[llvm] 3e79847 - [LegalizeDAG][RISCV] Don't promote f16 vector ISD::FNEG/FABS/FCOPYSIGN to f32 when we don't have Zvfh. (#106652)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 3 22:44:53 PDT 2024
Author: Craig Topper
Date: 2024-09-03T22:44:49-07:00
New Revision: 3e798476de466e8a051d3e753db379731a8d9705
URL: https://github.com/llvm/llvm-project/commit/3e798476de466e8a051d3e753db379731a8d9705
DIFF: https://github.com/llvm/llvm-project/commit/3e798476de466e8a051d3e753db379731a8d9705.diff
LOG: [LegalizeDAG][RISCV] Don't promote f16 vector ISD::FNEG/FABS/FCOPYSIGN to f32 when we don't have Zvfh. (#106652)
The fp_extend will canonicalize NaNs which is not the semantics of
FNEG/FABS/FCOPYSIGN.
For fixed vectors I'm scalarizing due to test changes on other targets
where the scalarization is expected. I will try to address in a follow
up.
For scalable vectors, we bitcast to integer and use integer logic ops.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 297c349ae4e2f4..29dae4e27c7689 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -142,6 +142,8 @@ class VectorLegalizer {
std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
SDValue ExpandStore(SDNode *N);
SDValue ExpandFNEG(SDNode *Node);
+ SDValue ExpandFABS(SDNode *Node);
+ SDValue ExpandFCOPYSIGN(SDNode *Node);
void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
@@ -942,6 +944,18 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::FABS:
+ if (SDValue Expanded = ExpandFABS(Node)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::FCOPYSIGN:
+ if (SDValue Expanded = ExpandFCOPYSIGN(Node)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::FSUB:
ExpandFSUB(Node, Results);
return;
@@ -1781,7 +1795,7 @@ SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
// FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) ||
- !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
+ !(TLI.isOperationLegalOrCustom(ISD::FSUB, VT) || VT.isScalableVector()))
return SDValue();
SDLoc DL(Node);
@@ -1792,6 +1806,53 @@ SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
return DAG.getNode(ISD::BITCAST, DL, VT, Xor);
}
+SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
+ EVT IntVT = VT.changeVectorElementTypeToInteger();
+
+ // FIXME: We shouldn't restrict this to scalable vectors.
+ if (!TLI.isOperationLegalOrCustom(ISD::AND, IntVT) || !VT.isScalableVector())
+ return SDValue();
+
+ SDLoc DL(Node);
+ SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+ SDValue ClearSignMask = DAG.getConstant(
+ APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Cast, ClearSignMask);
+ return DAG.getNode(ISD::BITCAST, DL, VT, ClearedSign);
+}
+
+SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
+ EVT IntVT = VT.changeVectorElementTypeToInteger();
+
+ // FIXME: We shouldn't restrict this to scalable vectors.
+ if (VT != Node->getOperand(1).getValueType() ||
+ !TLI.isOperationLegalOrCustom(ISD::AND, IntVT) ||
+ !TLI.isOperationLegalOrCustom(ISD::OR, IntVT) || !VT.isScalableVector())
+ return SDValue();
+
+ SDLoc DL(Node);
+ SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+ SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
+
+ SDValue SignMask = DAG.getConstant(
+ APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, Sign, SignMask);
+
+ SDValue ClearSignMask = DAG.getConstant(
+ APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Mag, ClearSignMask);
+
+ SDNodeFlags Flags;
+ Flags.setDisjoint(true);
+
+ SDValue CopiedSign =
+ DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit, Flags);
+
+ return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
+}
+
void VectorLegalizer::ExpandFSUB(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
// For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 3742b897ca568c..5089bbbe3c0d7c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -934,13 +934,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// TODO: support more ops.
static const unsigned ZvfhminPromoteOps[] = {
- ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
- ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
- ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
- ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
- ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM,
- ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
- ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
+ ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
+ ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN,
+ ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC,
+ ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB,
+ ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
// TODO: support more vp ops.
static const unsigned ZvfhminPromoteVPOps[] = {ISD::VP_FADD,
@@ -1082,6 +1081,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// load/store
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+
// Custom split nxv32f16 since nxv32f32 if not legal.
if (VT == MVT::nxv32f16) {
setOperationAction(ZvfhminPromoteOps, VT, Custom);
@@ -1337,6 +1340,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// available.
setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);
}
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
// Don't promote f16 vector operations to f32 if f32 vector type is
// not legal.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index 18431575732573..56cd718536daa4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -516,14 +516,50 @@ define void @fneg_v8f16(ptr %x) {
;
; ZVFHMIN-LABEL: fneg_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: mv a1, sp
+; ZVFHMIN-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-NEXT: lui a3, 1048568
+; ZVFHMIN-NEXT: fmv.x.h a4, fa3
+; ZVFHMIN-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-NEXT: lui a5, 8
+; ZVFHMIN-NEXT: xor a2, a2, a5
+; ZVFHMIN-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-NEXT: xor a1, a1, a3
+; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-NEXT: xor a4, a4, a3
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-NEXT: xor a2, a2, a3
+; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-NEXT: xor a1, a1, a3
+; ZVFHMIN-NEXT: xor a2, a2, a5
+; ZVFHMIN-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-NEXT: xor a2, a2, a3
+; ZVFHMIN-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: xor a1, a1, a3
+; ZVFHMIN-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4, v0.t
; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = fneg <8 x half> %a
@@ -542,35 +578,112 @@ define void @fneg_v6f16(ptr %x) {
;
; ZVFHMIN-RV32-LABEL: fneg_v6f16:
; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: addi sp, sp, -16
+; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfneg.v v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-RV32-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-RV32-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-RV32-NEXT: lui a3, 1048568
+; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa3
+; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-RV32-NEXT: xor a1, a1, a3
+; ZVFHMIN-RV32-NEXT: xor a2, a2, a3
+; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-RV32-NEXT: xor a4, a4, a3
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-RV32-NEXT: xor a5, a5, a3
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a5
+; ZVFHMIN-RV32-NEXT: xor a6, a6, a3
+; ZVFHMIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-RV32-NEXT: lui t0, 8
+; ZVFHMIN-RV32-NEXT: xor a7, a7, t0
+; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a7
+; ZVFHMIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a6
+; ZVFHMIN-RV32-NEXT: xor a6, a7, a3
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a6
+; ZVFHMIN-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-RV32-NEXT: xor a3, a6, a3
+; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a3
+; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a5
+; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-RV32-NEXT: addi sp, sp, 16
; ZVFHMIN-RV32-NEXT: ret
;
; ZVFHMIN-RV64-LABEL: fneg_v6f16:
; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT: addi sp, sp, -16
+; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfneg.v v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-RV64-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-RV64-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-RV64-NEXT: lui a3, 1048568
+; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa3
+; ZVFHMIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-RV64-NEXT: lui a5, 8
+; ZVFHMIN-RV64-NEXT: xor a2, a2, a5
+; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-RV64-NEXT: xor a1, a1, a3
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-RV64-NEXT: xor a4, a4, a3
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV64-NEXT: xor a2, a2, a3
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-RV64-NEXT: xor a1, a1, a3
+; ZVFHMIN-RV64-NEXT: xor a2, a2, a5
+; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV64-NEXT: xor a2, a2, a3
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: xor a1, a1, a3
+; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT: addi sp, sp, 16
; ZVFHMIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
%b = fneg <6 x half> %a
@@ -623,17 +736,101 @@ define void @fabs_v8f16(ptr %x) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-LABEL: fabs_v8f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-NEXT: ret
+; ZVFHMIN-RV32-LABEL: fabs_v8f16:
+; ZVFHMIN-RV32: # %bb.0:
+; ZVFHMIN-RV32-NEXT: addi sp, sp, -16
+; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-RV32-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-RV32-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-RV32-NEXT: lui a3, 8
+; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa3
+; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-RV32-NEXT: addi a3, a3, -1
+; ZVFHMIN-RV32-NEXT: and a2, a2, a3
+; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-RV32-NEXT: and a1, a1, a3
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-RV32-NEXT: and a4, a4, a3
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV32-NEXT: and a2, a2, a3
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-RV32-NEXT: and a1, a1, a3
+; ZVFHMIN-RV32-NEXT: and a2, a2, a3
+; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV32-NEXT: and a2, a2, a3
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: and a1, a1, a3
+; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT: addi sp, sp, 16
+; ZVFHMIN-RV32-NEXT: ret
+;
+; ZVFHMIN-RV64-LABEL: fabs_v8f16:
+; ZVFHMIN-RV64: # %bb.0:
+; ZVFHMIN-RV64-NEXT: addi sp, sp, -16
+; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-RV64-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-RV64-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-RV64-NEXT: lui a3, 8
+; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa3
+; ZVFHMIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-RV64-NEXT: addiw a3, a3, -1
+; ZVFHMIN-RV64-NEXT: and a2, a2, a3
+; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-RV64-NEXT: and a1, a1, a3
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-RV64-NEXT: and a4, a4, a3
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV64-NEXT: and a2, a2, a3
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-RV64-NEXT: and a1, a1, a3
+; ZVFHMIN-RV64-NEXT: and a2, a2, a3
+; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV64-NEXT: and a2, a2, a3
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: and a1, a1, a3
+; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-RV64-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-RV64-NEXT: addi sp, sp, 16
+; ZVFHMIN-RV64-NEXT: ret
%a = load <8 x half>, ptr %x
%b = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
store <8 x half> %b, ptr %x
@@ -652,35 +849,112 @@ define void @fabs_v6f16(ptr %x) {
;
; ZVFHMIN-RV32-LABEL: fabs_v6f16:
; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: addi sp, sp, -16
+; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfabs.v v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-RV32-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-RV32-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-RV32-NEXT: lui a3, 8
+; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa3
+; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-RV32-NEXT: addi a3, a3, -1
+; ZVFHMIN-RV32-NEXT: and a1, a1, a3
+; ZVFHMIN-RV32-NEXT: and a2, a2, a3
+; ZVFHMIN-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-RV32-NEXT: and a4, a4, a3
+; ZVFHMIN-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV32-NEXT: and a5, a5, a3
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a5
+; ZVFHMIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-RV32-NEXT: and a6, a6, a3
+; ZVFHMIN-RV32-NEXT: and a7, a7, a3
+; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a7
+; ZVFHMIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a6
+; ZVFHMIN-RV32-NEXT: and a6, a7, a3
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a6
+; ZVFHMIN-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-RV32-NEXT: and a3, a6, a3
+; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a3
+; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a5
+; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-RV32-NEXT: addi sp, sp, 16
; ZVFHMIN-RV32-NEXT: ret
;
; ZVFHMIN-RV64-LABEL: fabs_v6f16:
; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT: addi sp, sp, -16
+; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfabs.v v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-RV64-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-RV64-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-RV64-NEXT: lui a3, 8
+; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa3
+; ZVFHMIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-RV64-NEXT: addiw a3, a3, -1
+; ZVFHMIN-RV64-NEXT: and a2, a2, a3
+; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-RV64-NEXT: and a1, a1, a3
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-RV64-NEXT: and a4, a4, a3
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV64-NEXT: and a2, a2, a3
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-RV64-NEXT: and a1, a1, a3
+; ZVFHMIN-RV64-NEXT: and a2, a2, a3
+; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV64-NEXT: and a2, a2, a3
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: and a1, a1, a3
+; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT: addi sp, sp, 16
; ZVFHMIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a)
@@ -737,19 +1011,287 @@ define void @copysign_v8f16(ptr %x, ptr %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-LABEL: copysign_v8f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v8, v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_v8f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa2, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa1, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_v8f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa2, fa3
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa1, fa2
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_v8f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui t1, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, t1, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, t1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t0, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, t2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, t1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_v8f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui t1, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, t1, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, t1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t0, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, t2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, t1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
%c = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
@@ -768,58 +1310,331 @@ define void @copysign_v6f16(ptr %x, ptr %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: copysign_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v8, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_v6f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa2, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa1, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a6
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a6
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-ZFH-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: ret
;
-; ZVFHMIN-RV64-LABEL: copysign_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v8, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
- %a = load <6 x half>, ptr %x
- %b = load <6 x half>, ptr %y
- %c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b)
- store <6 x half> %c, ptr %x
- ret void
-}
-declare <6 x half> @llvm.copysign.v6f16(<6 x half>, <6 x half>)
-
-define void @copysign_v4f32(ptr %x, ptr %y) {
-; ZVFH-LABEL: copysign_v4f32:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; ZVFH-NEXT: vle32.v v8, (a0)
-; ZVFH-NEXT: vle32.v v9, (a1)
-; ZVFH-NEXT: vfsgnj.vv v8, v8, v9
-; ZVFH-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_v6f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa2, fa3
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa1, fa2
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_v6f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui t1, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, t1, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t0, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a7, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t2, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a6, a6, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t0, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t0, t2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a7, t0, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t0, t2, t1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t1, t1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: or t0, t1, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t0, t0, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a7, t0, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, t1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a7, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_v6f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui t1, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, t1, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, t1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t0, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, t2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, t1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
+ %a = load <6 x half>, ptr %x
+ %b = load <6 x half>, ptr %y
+ %c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b)
+ store <6 x half> %c, ptr %x
+ ret void
+}
+declare <6 x half> @llvm.copysign.v6f16(<6 x half>, <6 x half>)
+
+define void @copysign_v4f32(ptr %x, ptr %y) {
+; ZVFH-LABEL: copysign_v4f32:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZVFH-NEXT: vle32.v v8, (a0)
+; ZVFH-NEXT: vle32.v v9, (a1)
+; ZVFH-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFH-NEXT: vse32.v v8, (a0)
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: copysign_v4f32:
@@ -864,20 +1679,215 @@ define void @copysign_vf_v8f16(ptr %x, half %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-LABEL: copysign_vf_v8f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_vf_v8f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa4, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa4, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a4, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_vf_v8f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa4, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa4, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a4, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_vf_v8f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, a3, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_vf_v8f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, a3, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <8 x half>, ptr %x
%b = insertelement <8 x half> poison, half %y, i32 0
%c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
@@ -895,52 +1905,247 @@ define void @copysign_vf_v6f16(ptr %x, half %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: copysign_vf_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: li a2, 192
-; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v9, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_vf_v6f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa4, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa4, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a4, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a6
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-ZFH-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: ret
;
-; ZVFHMIN-RV64-LABEL: copysign_vf_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: li a2, 192
-; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v9, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_vf_v6f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa4, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa4, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a4, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_vf_v6f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a5, a2, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a6, a6, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a7, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a7, a7, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t0, t0, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or t0, t0, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t1, t1, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, t1, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_vf_v6f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui a4, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a4, a4, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -999,24 +2204,303 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-LABEL: copysign_neg_v8f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v8, v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_neg_v8f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa0, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft0, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft1, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft2, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft3, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h ft1, ft2, ft1
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, ft1
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h ft0, ft3, ft0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, ft0
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft0, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft1, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa0, ft0, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa1, ft1, fa1
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa0, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa1
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa2, fa0, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa3, fa1, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_neg_v8f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa0, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft0, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft1, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft2, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft3, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h ft1, ft2, ft1
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, ft1
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h ft0, ft3, ft0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, ft0
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft0, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft1, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa0, ft0, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa1, ft1, fa1
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa0, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa1
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa2, fa0, fa2
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa2
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa3, fa1, fa3
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa3
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_neg_v8f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a1, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a2, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: not t1, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a6, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a5, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a5)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a7, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: not t0, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT: not t2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui t3, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, t3, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t4, a5, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t2, t2, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or t2, t4, t2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t4, t4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t0, t0, t3
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or t0, t4, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, t2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t0, t0, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a7, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a7, t0, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a6, a7, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t1, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a6, a6, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, t3
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a7, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a4, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_neg_v8f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a1, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a2, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: not t1, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a6, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a5, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a5)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a7, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT: not t0, t0
+; ZVFHMIN-ZFHIN-RV64-NEXT: not t2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui t3, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, t3, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and t4, a5, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT: and t2, t2, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: or t2, t4, t2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and t4, t4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and t0, t0, t3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: or t0, t4, t0
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, t0
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, t2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and t0, t0, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, a7, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a7, t0, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a6, a7, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, t1, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a6, a6, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, t3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a7, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
%c = fneg <8 x half> %b
@@ -1035,52 +2519,331 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: copysign_neg_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfneg.v v8, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_neg_v6f16:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa0, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft0, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft1, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft2, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft3, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h ft1, ft2, ft1
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, ft1
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h ft0, ft3, ft0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, ft0
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft0, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: flh ft1, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa0, ft0, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa1, ft1, fa1
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa0, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a4, fa1
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa2, fa0, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa3, fa1, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a6, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a6
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-ZFH-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: ret
;
-; ZVFHMIN-RV64-LABEL: copysign_neg_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfneg.v v8, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_neg_v6f16:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa0, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft0, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft1, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft2, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft3, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h ft1, ft2, ft1
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, ft1
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h ft0, ft3, ft0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, ft0
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft0, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT: flh ft1, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa0, ft0, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa1, ft1, fa1
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa0, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa1
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa2, fa0, fa2
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa2
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa3, fa1, fa3
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa3
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_neg_v6f16:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a1, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a2, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a6, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: not t1, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: not t2, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a4, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a4)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: not t3, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a5, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a3, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui t0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a4, t0, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t4, a7, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui a7, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, t4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t4, t4, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, t4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t4, t4, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t3, t3, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: or t3, t4, t3
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, t3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t4, t4, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t2, t2, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: or t2, t4, t2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, t2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t4, t4, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t1, t1, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: or t1, t4, t1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t4, t4, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a6, t4, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, t1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a6, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, t3
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, t2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_neg_v6f16:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a1, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a2, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: not t1, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a6, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a5, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a5)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a7, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT: not t0, t0
+; ZVFHMIN-ZFHIN-RV64-NEXT: not t2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui t3, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, t3, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and t4, a5, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT: and t2, t2, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: or t2, t4, t2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and t4, t4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and t0, t0, t3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: or t0, t4, t0
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, t0
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, t2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and t0, t0, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, a7, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a7, t0, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a6, a7, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, t1, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a6, a6, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, t3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a7, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a0, a0, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = fneg <6 x half> %b
@@ -1143,25 +2906,187 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-LABEL: copysign_neg_trunc_v4f16_v4f32:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vle32.v v9, (a1)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_neg_trunc_v4f16_v4f32:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: vle32.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 8
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_neg_trunc_v4f16_v4f32:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vle32.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 8
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_neg_trunc_v4f16_v4f32:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle32.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a4, a2, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a3, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui a6, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a3, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a5, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a1, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a1, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_neg_trunc_v4f16_v4f32:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle32.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a4, a2, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a3, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui a6, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a3, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a5, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a1, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a1, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <4 x half>, ptr %x
%b = load <4 x float>, ptr %y
%c = fneg <4 x float> %b
@@ -1185,65 +3110,215 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: addi sp, sp, -16
-; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle32.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfneg.v v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v10, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: addi a1, sp, 8
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp)
-; ZVFHMIN-RV32-NEXT: fsh fa5, 4(a0)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vse32.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: addi sp, sp, 16
-; ZVFHMIN-RV32-NEXT: ret
+; ZVFHMIN-ZFH-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vle32.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 8
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 24
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 4(a0)
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: ret
;
-; ZVFHMIN-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: addi sp, sp, -16
-; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vle64.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle32.v v9, (a1)
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfneg.v v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v10, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: addi a1, sp, 8
-; ZVFHMIN-RV64-NEXT: vse16.v v9, (a1)
-; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp)
-; ZVFHMIN-RV64-NEXT: fsh fa5, 4(a0)
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse32.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: addi sp, sp, 16
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-ZFH-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle32.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle64.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v10, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 8
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa1, fa2
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 24
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 4(a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; ZVFHMIN-ZFHIN-RV32: # %bb.0:
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vle32.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a4, a2, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a3, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui a6, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a3, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a5, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a1, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a1, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 24
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 4(a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFHIN-RV32-NEXT: ret
+;
+; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; ZVFHMIN-ZFHIN-RV64: # %bb.0:
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle32.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle64.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a1, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui a4, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a5, a4, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a3, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a1, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a1, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 24
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 4(a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <3 x half>, ptr %x
%b = load <3 x float>, ptr %y
%c = fneg <3 x float> %b
@@ -1543,23 +3618,59 @@ define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) {
;
; ZVFHMIN-LABEL: fmsub_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
; ZVFHMIN-NEXT: vle16.v v8, (a2)
; ZVFHMIN-NEXT: vle16.v v9, (a0)
; ZVFHMIN-NEXT: vle16.v v10, (a1)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v11
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT: mv a1, sp
+; ZVFHMIN-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-NEXT: lui a3, 1048568
+; ZVFHMIN-NEXT: fmv.x.h a4, fa3
+; ZVFHMIN-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-NEXT: lui a5, 8
+; ZVFHMIN-NEXT: xor a2, a2, a5
+; ZVFHMIN-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-NEXT: xor a1, a1, a3
+; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-NEXT: xor a4, a4, a3
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-NEXT: xor a2, a2, a3
+; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-NEXT: xor a1, a1, a3
+; ZVFHMIN-NEXT: xor a2, a2, a5
+; ZVFHMIN-NEXT: vmv.v.x v11, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-NEXT: vslide1down.vx v11, v11, a1
+; ZVFHMIN-NEXT: xor a2, a2, a3
+; ZVFHMIN-NEXT: vslide1down.vx v11, v11, a2
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: xor a1, a1, a3
+; ZVFHMIN-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-NEXT: vslide1down.vx v11, v11, a1
+; ZVFHMIN-NEXT: vslidedown.vi v11, v8, 4, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9
+; ZVFHMIN-NEXT: vfmadd.vv v9, v11, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
@@ -1583,53 +3694,125 @@ define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
;
; ZVFHMIN-RV32-LABEL: fmsub_v6f16:
; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV32-NEXT: addi sp, sp, -16
+; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
; ZVFHMIN-RV32-NEXT: vle16.v v8, (a2)
; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
; ZVFHMIN-RV32-NEXT: vle16.v v10, (a1)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfneg.v v8, v11
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v11, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-RV32-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-RV32-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-RV32-NEXT: lui a3, 1048568
+; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa3
+; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-RV32-NEXT: lui a5, 8
+; ZVFHMIN-RV32-NEXT: xor a2, a2, a5
+; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-RV32-NEXT: xor a1, a1, a3
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-RV32-NEXT: xor a4, a4, a3
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV32-NEXT: xor a2, a2, a3
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-RV32-NEXT: xor a1, a1, a3
+; ZVFHMIN-RV32-NEXT: xor a2, a2, a5
+; ZVFHMIN-RV32-NEXT: vmv.v.x v11, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v11, v11, a1
+; ZVFHMIN-RV32-NEXT: xor a2, a2, a3
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v11, v11, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: xor a1, a1, a3
+; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v11, v11, a1
+; ZVFHMIN-RV32-NEXT: vslidedown.vi v11, v8, 4, v0.t
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v11
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v9
; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v11
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmacc.vv v10, v8, v9
+; ZVFHMIN-RV32-NEXT: vfmadd.vv v9, v11, v8
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT: addi sp, sp, 16
; ZVFHMIN-RV32-NEXT: ret
;
; ZVFHMIN-RV64-LABEL: fmsub_v6f16:
; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-RV64-NEXT: addi sp, sp, -16
+; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
; ZVFHMIN-RV64-NEXT: vle16.v v8, (a2)
; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
; ZVFHMIN-RV64-NEXT: vle16.v v10, (a1)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfneg.v v8, v11
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v11, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-RV64-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-RV64-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-RV64-NEXT: lui a3, 1048568
+; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa3
+; ZVFHMIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-RV64-NEXT: lui a5, 8
+; ZVFHMIN-RV64-NEXT: xor a2, a2, a5
+; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-RV64-NEXT: xor a1, a1, a3
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-RV64-NEXT: xor a4, a4, a3
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV64-NEXT: xor a2, a2, a3
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-RV64-NEXT: xor a1, a1, a3
+; ZVFHMIN-RV64-NEXT: xor a2, a2, a5
+; ZVFHMIN-RV64-NEXT: vmv.v.x v11, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v11, v11, a1
+; ZVFHMIN-RV64-NEXT: xor a2, a2, a3
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v11, v11, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV64-NEXT: xor a1, a1, a3
+; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v11, v11, a1
+; ZVFHMIN-RV64-NEXT: vslidedown.vi v11, v8, 4, v0.t
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v11
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v9
; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v11
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmacc.vv v10, v8, v9
+; ZVFHMIN-RV64-NEXT: vfmadd.vv v9, v11, v8
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT: addi sp, sp, 16
; ZVFHMIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
@@ -2018,17 +4201,187 @@ define void @fneg_v16f16(ptr %x) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-LABEL: fneg_v16f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vse16.v v10, (a0)
-; ZVFHMIN-NEXT: ret
+; ZVFHMIN-RV32-LABEL: fneg_v16f16:
+; ZVFHMIN-RV32: # %bb.0:
+; ZVFHMIN-RV32-NEXT: addi sp, sp, -64
+; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 64
+; ZVFHMIN-RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; ZVFHMIN-RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; ZVFHMIN-RV32-NEXT: .cfi_offset ra, -4
+; ZVFHMIN-RV32-NEXT: .cfi_offset s0, -8
+; ZVFHMIN-RV32-NEXT: addi s0, sp, 64
+; ZVFHMIN-RV32-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN-RV32-NEXT: andi sp, sp, -32
+; ZVFHMIN-RV32-NEXT: vsetivli zero, 16, e16, m1, ta, mu
+; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-RV32-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-RV32-NEXT: flh fa3, 6(sp)
+; ZVFHMIN-RV32-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-RV32-NEXT: flh fa4, 8(sp)
+; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-RV32-NEXT: fmv.x.h a5, fa3
+; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-RV32-NEXT: fmv.x.h a6, fa4
+; ZVFHMIN-RV32-NEXT: lui a1, 1048568
+; ZVFHMIN-RV32-NEXT: xor a2, a2, a1
+; ZVFHMIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-RV32-NEXT: lui t0, 8
+; ZVFHMIN-RV32-NEXT: xor a3, a3, t0
+; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a3
+; ZVFHMIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-RV32-NEXT: xor a4, a4, a1
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-RV32-NEXT: xor a5, a5, a1
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a5
+; ZVFHMIN-RV32-NEXT: xor a4, a6, a1
+; ZVFHMIN-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV32-NEXT: xor a4, a7, a1
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-RV32-NEXT: xor a3, a3, a1
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-RV32-NEXT: xor a2, a2, a1
+; ZVFHMIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-RV32-NEXT: xor a2, a4, t0
+; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-RV32-NEXT: xor a5, a5, a1
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-RV32-NEXT: xor a3, a3, a1
+; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a3
+; ZVFHMIN-RV32-NEXT: xor a2, a2, a1
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-RV32-NEXT: xor a4, a4, a1
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a4
+; ZVFHMIN-RV32-NEXT: xor a2, a2, a1
+; ZVFHMIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-RV32-NEXT: xor a3, a3, a1
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a3
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT: xor a1, a2, a1
+; ZVFHMIN-RV32-NEXT: li a2, 255
+; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 8, v0.t
+; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT: addi sp, s0, -64
+; ZVFHMIN-RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; ZVFHMIN-RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; ZVFHMIN-RV32-NEXT: addi sp, sp, 64
+; ZVFHMIN-RV32-NEXT: ret
+;
+; ZVFHMIN-RV64-LABEL: fneg_v16f16:
+; ZVFHMIN-RV64: # %bb.0:
+; ZVFHMIN-RV64-NEXT: addi sp, sp, -64
+; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 64
+; ZVFHMIN-RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-RV64-NEXT: .cfi_offset ra, -8
+; ZVFHMIN-RV64-NEXT: .cfi_offset s0, -16
+; ZVFHMIN-RV64-NEXT: addi s0, sp, 64
+; ZVFHMIN-RV64-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN-RV64-NEXT: andi sp, sp, -32
+; ZVFHMIN-RV64-NEXT: vsetivli zero, 16, e16, m1, ta, mu
+; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-RV64-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-RV64-NEXT: flh fa3, 6(sp)
+; ZVFHMIN-RV64-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-RV64-NEXT: flh fa4, 8(sp)
+; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-RV64-NEXT: fmv.x.h a5, fa3
+; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-RV64-NEXT: fmv.x.h a6, fa4
+; ZVFHMIN-RV64-NEXT: lui a1, 1048568
+; ZVFHMIN-RV64-NEXT: xor a2, a2, a1
+; ZVFHMIN-RV64-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-RV64-NEXT: lui t0, 8
+; ZVFHMIN-RV64-NEXT: xor a3, a3, t0
+; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a3
+; ZVFHMIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-RV64-NEXT: xor a4, a4, a1
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-RV64-NEXT: xor a5, a5, a1
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a5
+; ZVFHMIN-RV64-NEXT: xor a4, a6, a1
+; ZVFHMIN-RV64-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV64-NEXT: xor a4, a7, a1
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-RV64-NEXT: xor a3, a3, a1
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-RV64-NEXT: xor a2, a2, a1
+; ZVFHMIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-RV64-NEXT: xor a2, a4, t0
+; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 24(sp)
+; ZVFHMIN-RV64-NEXT: xor a5, a5, a1
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-RV64-NEXT: xor a3, a3, a1
+; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 26(sp)
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a3
+; ZVFHMIN-RV64-NEXT: xor a2, a2, a1
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-RV64-NEXT: xor a4, a4, a1
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a4
+; ZVFHMIN-RV64-NEXT: xor a2, a2, a1
+; ZVFHMIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 30(sp)
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-RV64-NEXT: xor a3, a3, a1
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a3
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: xor a1, a2, a1
+; ZVFHMIN-RV64-NEXT: li a2, 255
+; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV64-NEXT: vslidedown.vi v9, v8, 8, v0.t
+; ZVFHMIN-RV64-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-RV64-NEXT: addi sp, s0, -64
+; ZVFHMIN-RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-RV64-NEXT: addi sp, sp, 64
+; ZVFHMIN-RV64-NEXT: ret
%a = load <16 x half>, ptr %x
%b = fneg <16 x half> %a
store <16 x half> %b, ptr %x
@@ -3554,24 +5907,60 @@ define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) {
;
; ZVFHMIN-LABEL: fmsub_vf_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: mv a1, sp
+; ZVFHMIN-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-NEXT: flh fa4, 0(sp)
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: lui a1, 1048568
+; ZVFHMIN-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-NEXT: lui a5, 8
+; ZVFHMIN-NEXT: xor a3, a3, a5
+; ZVFHMIN-NEXT: vmv.v.x v10, a3
+; ZVFHMIN-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-NEXT: xor a2, a2, a1
+; ZVFHMIN-NEXT: vslide1down.vx v10, v10, a2
+; ZVFHMIN-NEXT: xor a4, a4, a1
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-NEXT: vslide1down.vx v10, v10, a4
+; ZVFHMIN-NEXT: xor a3, a3, a1
+; ZVFHMIN-NEXT: vslide1down.vx v10, v10, a3
+; ZVFHMIN-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-NEXT: xor a2, a2, a1
+; ZVFHMIN-NEXT: xor a3, a3, a5
+; ZVFHMIN-NEXT: vmv.v.x v11, a3
+; ZVFHMIN-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-NEXT: vslide1down.vx v11, v11, a2
+; ZVFHMIN-NEXT: xor a3, a3, a1
+; ZVFHMIN-NEXT: vslide1down.vx v11, v11, a3
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: xor a1, a2, a1
+; ZVFHMIN-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-NEXT: vslide1down.vx v11, v11, a1
+; ZVFHMIN-NEXT: vslidedown.vi v11, v10, 4, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v11
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9
+; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = load <8 x half>, ptr %y
@@ -3595,63 +5984,135 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
;
; ZVFHMIN-RV32-LABEL: fmsub_vf_v6f16:
; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT: addi sp, sp, -16
+; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
; ZVFHMIN-RV32-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: li a2, 192
-; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV32-NEXT: vmv.v.x v10, a2
-; ZVFHMIN-RV32-NEXT: vmerge.vxm v10, v10, a1, v0
+; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-RV32-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-RV32-NEXT: li a4, 192
+; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a4
+; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a3, v0
+; ZVFHMIN-RV32-NEXT: lui a1, 1048568
+; ZVFHMIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-RV32-NEXT: lui a5, 8
+; ZVFHMIN-RV32-NEXT: xor a4, a4, a5
+; ZVFHMIN-RV32-NEXT: vmv.v.x v10, a4
+; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-RV32-NEXT: xor a2, a2, a1
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v10, v10, a2
+; ZVFHMIN-RV32-NEXT: xor a3, a3, a1
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v10, v10, a3
+; ZVFHMIN-RV32-NEXT: xor a4, a4, a1
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v10, v10, a4
+; ZVFHMIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-RV32-NEXT: xor a2, a2, a1
+; ZVFHMIN-RV32-NEXT: xor a3, a3, a5
+; ZVFHMIN-RV32-NEXT: vmv.v.x v11, a3
+; ZVFHMIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v11, v11, a2
+; ZVFHMIN-RV32-NEXT: xor a3, a3, a1
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v11, v11, a3
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT: xor a1, a2, a1
+; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v11, v11, a1
+; ZVFHMIN-RV32-NEXT: vslidedown.vi v11, v10, 4, v0.t
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v11
; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v9
+; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfneg.v v9, v11
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v11, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v11
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmacc.vv v8, v9, v10
+; ZVFHMIN-RV32-NEXT: vfmadd.vv v9, v11, v10
; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
+; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT: addi sp, sp, 16
; ZVFHMIN-RV32-NEXT: ret
;
; ZVFHMIN-RV64-LABEL: fmsub_vf_v6f16:
; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT: addi sp, sp, -16
+; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
; ZVFHMIN-RV64-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: li a2, 192
-; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV64-NEXT: vmv.v.x v10, a2
-; ZVFHMIN-RV64-NEXT: vmerge.vxm v10, v10, a1, v0
+; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-RV64-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-RV64-NEXT: li a4, 192
+; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a4
+; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a3, v0
+; ZVFHMIN-RV64-NEXT: lui a1, 1048568
+; ZVFHMIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-RV64-NEXT: lui a5, 8
+; ZVFHMIN-RV64-NEXT: xor a4, a4, a5
+; ZVFHMIN-RV64-NEXT: vmv.v.x v10, a4
+; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-RV64-NEXT: xor a2, a2, a1
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v10, v10, a2
+; ZVFHMIN-RV64-NEXT: xor a3, a3, a1
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v10, v10, a3
+; ZVFHMIN-RV64-NEXT: xor a4, a4, a1
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v10, v10, a4
+; ZVFHMIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-RV64-NEXT: xor a2, a2, a1
+; ZVFHMIN-RV64-NEXT: xor a3, a3, a5
+; ZVFHMIN-RV64-NEXT: vmv.v.x v11, a3
+; ZVFHMIN-RV64-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v11, v11, a2
+; ZVFHMIN-RV64-NEXT: xor a3, a3, a1
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v11, v11, a3
+; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT: xor a1, a2, a1
+; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-RV64-NEXT: vslide1down.vx v11, v11, a1
+; ZVFHMIN-RV64-NEXT: vslidedown.vi v11, v10, 4, v0.t
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v11
; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v9
+; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfneg.v v9, v11
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v11, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v11
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmacc.vv v8, v9, v10
+; ZVFHMIN-RV64-NEXT: vfmadd.vv v9, v11, v10
; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
+; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
+; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT: addi sp, sp, 16
; ZVFHMIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
index 95a410ea56b74a..4bf9ae16cdaf01 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
@@ -19,12 +19,10 @@ define <vscale x 1 x half> @vfabs_nxv1f16(<vscale x 1 x half> %v) {
;
; ZVFHMIN-LABEL: vfabs_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v9, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%r = call <vscale x 1 x half> @llvm.fabs.nxv1f16(<vscale x 1 x half> %v)
ret <vscale x 1 x half> %r
@@ -41,12 +39,10 @@ define <vscale x 2 x half> @vfabs_nxv2f16(<vscale x 2 x half> %v) {
;
; ZVFHMIN-LABEL: vfabs_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v9, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%r = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> %v)
ret <vscale x 2 x half> %r
@@ -63,12 +59,10 @@ define <vscale x 4 x half> @vfabs_nxv4f16(<vscale x 4 x half> %v) {
;
; ZVFHMIN-LABEL: vfabs_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v10, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%r = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %v)
ret <vscale x 4 x half> %r
@@ -85,12 +79,10 @@ define <vscale x 8 x half> @vfabs_nxv8f16(<vscale x 8 x half> %v) {
;
; ZVFHMIN-LABEL: vfabs_nxv8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%r = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %v)
ret <vscale x 8 x half> %r
@@ -107,12 +99,10 @@ define <vscale x 16 x half> @vfabs_nxv16f16(<vscale x 16 x half> %v) {
;
; ZVFHMIN-LABEL: vfabs_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%r = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> %v)
ret <vscale x 16 x half> %r
@@ -129,17 +119,10 @@ define <vscale x 32 x half> @vfabs_nxv32f16(<vscale x 32 x half> %v) {
;
; ZVFHMIN-LABEL: vfabs_nxv32f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%r = call <vscale x 32 x half> @llvm.fabs.nxv32f16(<vscale x 32 x half> %v)
ret <vscale x 32 x half> %r
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
index 029a121d08980c..c71c07488581a9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
@@ -19,13 +19,12 @@ define <vscale x 1 x half> @vfcopysign_vv_nxv1f16(<vscale x 1 x half> %vm, <vsca
;
; ZVFHMIN-LABEL: vfcopysign_vv_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %vs)
ret <vscale x 1 x half> %r
@@ -45,12 +44,11 @@ define <vscale x 1 x half> @vfcopysign_vf_nxv1f16(<vscale x 1 x half> %vm, half
; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -67,18 +65,13 @@ define <vscale x 1 x half> @vfcopynsign_vv_nxv1f16(<vscale x 1 x half> %vm, <vsc
;
; ZVFHMIN-LABEL: vfcopynsign_vv_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 1 x half> %vs
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %n)
@@ -99,17 +92,12 @@ define <vscale x 1 x half> @vfcopynsign_vf_nxv1f16(<vscale x 1 x half> %vm, half
; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v9, v10, a0
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -130,12 +118,11 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vv_nxv1f16_nxv1f32(<vscale x 1 x
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vand.vx v9, v10, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%e = fptrunc <vscale x 1 x float> %vs to <vscale x 1 x half>
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %e)
@@ -158,12 +145,11 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vf_nxv1f16_nxv1f32(<vscale x 1 x
; ZVFHMIN-NEXT: vfmv.v.f v9, fa0
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x float> poison, float %s, i32 0
%splat = shufflevector <vscale x 1 x float> %head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
@@ -182,19 +168,14 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32(<vscale x 1
;
; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: vxor.vx v9, v10, a0
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 1 x float> %vs
%eneg = fptrunc <vscale x 1 x float> %n to <vscale x 1 x half>
@@ -216,19 +197,14 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32(<vscale x 1
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vfmv.v.f v9, fa0
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: vxor.vx v9, v10, a0
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x float> poison, float %s, i32 0
%splat = shufflevector <vscale x 1 x float> %head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
@@ -254,12 +230,11 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vv_nxv1f16_nxv1f64(<vscale x 1 x
; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v10, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%e = fptrunc <vscale x 1 x double> %vs to <vscale x 1 x half>
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %e)
@@ -286,12 +261,11 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vf_nxv1f16_nxv1f64(<vscale x 1 x
; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v10, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x double> poison, double %s, i32 0
%splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
@@ -312,22 +286,17 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64(<vscale x 1
;
; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8
+; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v10, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 1 x double> %vs
%eneg = fptrunc <vscale x 1 x double> %n to <vscale x 1 x half>
@@ -351,22 +320,17 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64(<vscale x 1
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; ZVFHMIN-NEXT: vfmv.v.f v9, fa0
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v10, v8
+; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v10, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x double> poison, double %s, i32 0
%splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
@@ -387,13 +351,12 @@ define <vscale x 2 x half> @vfcopysign_vv_nxv2f16(<vscale x 2 x half> %vm, <vsca
;
; ZVFHMIN-LABEL: vfcopysign_vv_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%r = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> %vm, <vscale x 2 x half> %vs)
ret <vscale x 2 x half> %r
@@ -413,12 +376,11 @@ define <vscale x 2 x half> @vfcopysign_vf_nxv2f16(<vscale x 2 x half> %vm, half
; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -435,18 +397,13 @@ define <vscale x 2 x half> @vfcopynsign_vv_nxv2f16(<vscale x 2 x half> %vm, <vsc
;
; ZVFHMIN-LABEL: vfcopynsign_vv_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 2 x half> %vs
%r = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> %vm, <vscale x 2 x half> %n)
@@ -467,17 +424,12 @@ define <vscale x 2 x half> @vfcopynsign_vf_nxv2f16(<vscale x 2 x half> %vm, half
; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v9, v10, a0
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -497,13 +449,12 @@ define <vscale x 4 x half> @vfcopysign_vv_nxv4f16(<vscale x 4 x half> %vm, <vsca
;
; ZVFHMIN-LABEL: vfcopysign_vv_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%r = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> %vm, <vscale x 4 x half> %vs)
ret <vscale x 4 x half> %r
@@ -523,12 +474,11 @@ define <vscale x 4 x half> @vfcopysign_vf_nxv4f16(<vscale x 4 x half> %vm, half
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v10, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -545,18 +495,13 @@ define <vscale x 4 x half> @vfcopynsign_vv_nxv4f16(<vscale x 4 x half> %vm, <vsc
;
; ZVFHMIN-LABEL: vfcopynsign_vv_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v10, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 4 x half> %vs
%r = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> %vm, <vscale x 4 x half> %n)
@@ -577,17 +522,12 @@ define <vscale x 4 x half> @vfcopynsign_vf_nxv4f16(<vscale x 4 x half> %vm, half
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v10, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -607,13 +547,12 @@ define <vscale x 8 x half> @vfcopysign_vv_nxv8f16(<vscale x 8 x half> %vm, <vsca
;
; ZVFHMIN-LABEL: vfcopysign_vv_nxv8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %vs)
ret <vscale x 8 x half> %r
@@ -633,12 +572,11 @@ define <vscale x 8 x half> @vfcopysign_vf_nxv8f16(<vscale x 8 x half> %vm, half
; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -655,18 +593,13 @@ define <vscale x 8 x half> @vfcopynsign_vv_nxv8f16(<vscale x 8 x half> %vm, <vsc
;
; ZVFHMIN-LABEL: vfcopynsign_vv_nxv8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 8 x half> %vs
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %n)
@@ -687,17 +620,12 @@ define <vscale x 8 x half> @vfcopynsign_vf_nxv8f16(<vscale x 8 x half> %vm, half
; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -718,12 +646,11 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vv_nxv8f16_nxv8f32(<vscale x 8 x
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%e = fptrunc <vscale x 8 x float> %vs to <vscale x 8 x half>
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %e)
@@ -746,12 +673,11 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vf_nxv8f16_nxv8f32(<vscale x 8 x
; ZVFHMIN-NEXT: vfmv.v.f v12, fa0
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x float> poison, float %s, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -770,19 +696,14 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32(<vscale x 8
;
; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 8 x float> %vs
%eneg = fptrunc <vscale x 8 x float> %n to <vscale x 8 x half>
@@ -804,19 +725,14 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f32(<vscale x 8
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: vfmv.v.f v12, fa0
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x float> poison, float %s, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
@@ -842,12 +758,11 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vv_nxv8f16_nxv8f64(<vscale x 8 x
; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%e = fptrunc <vscale x 8 x double> %vs to <vscale x 8 x half>
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %e)
@@ -874,12 +789,11 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vf_nxv8f16_nxv8f64(<vscale x 8 x
; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x double> poison, double %s, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -900,22 +814,17 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64(<vscale x 8
;
; ZVFHMIN-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v8, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v8
+; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 8 x double> %vs
%eneg = fptrunc <vscale x 8 x double> %n to <vscale x 8 x half>
@@ -939,22 +848,17 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f64(<vscale x 8
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; ZVFHMIN-NEXT: vfmv.v.f v16, fa0
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v8, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v12, v8
+; ZVFHMIN-NEXT: vfncvt.rod.f.f.w v12, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x double> poison, double %s, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
@@ -975,13 +879,12 @@ define <vscale x 16 x half> @vfcopysign_vv_nxv16f16(<vscale x 16 x half> %vm, <v
;
; ZVFHMIN-LABEL: vfcopysign_vv_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v12, v12, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
%r = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> %vm, <vscale x 16 x half> %vs)
ret <vscale x 16 x half> %r
@@ -1001,12 +904,11 @@ define <vscale x 16 x half> @vfcopysign_vf_nxv16f16(<vscale x 16 x half> %vm, ha
; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v12, v12, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -1023,18 +925,13 @@ define <vscale x 16 x half> @vfcopynsign_vv_nxv16f16(<vscale x 16 x half> %vm, <
;
; ZVFHMIN-LABEL: vfcopynsign_vv_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT: vand.vx v12, v12, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 16 x half> %vs
%r = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> %vm, <vscale x 16 x half> %n)
@@ -1055,17 +952,12 @@ define <vscale x 16 x half> @vfcopynsign_vf_nxv16f16(<vscale x 16 x half> %vm, h
; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v12, v12, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -1085,19 +977,12 @@ define <vscale x 32 x half> @vfcopysign_vv_nxv32f16(<vscale x 32 x half> %vm, <v
;
; ZVFHMIN-LABEL: vfcopysign_vv_nxv32f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v24, v0, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v16, v16, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
%r = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> %vm, <vscale x 32 x half> %vs)
ret <vscale x 32 x half> %r
@@ -1117,17 +1002,13 @@ define <vscale x 32 x half> @vfcopysign_vf_nxv32f16(<vscale x 32 x half> %vm, ha
; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: vmv.v.v v28, v24
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v16, v24, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 32 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
@@ -1144,29 +1025,13 @@ define <vscale x 32 x half> @vfcopynsign_vv_nxv32f16(<vscale x 32 x half> %vm, <
;
; ZVFHMIN-LABEL: vfcopynsign_vv_nxv32f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v24, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v24, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v24, v24, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v20
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a0
+; ZVFHMIN-NEXT: vand.vx v16, v16, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
%n = fneg <vscale x 32 x half> %vs
%r = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> %vm, <vscale x 32 x half> %n)
@@ -1187,22 +1052,14 @@ define <vscale x 32 x half> @vfcopynsign_vf_nxv32f16(<vscale x 32 x half> %vm, h
; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: vmv.v.v v28, v24
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v16, v24, a0
+; ZVFHMIN-NEXT: addi a1, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vand.vx v16, v16, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 32 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll
index c835dc72268b32..725ac14b0e7a7e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll
@@ -22,19 +22,16 @@ define <vscale x 1 x half> @vfmsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfmsub_vv_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v11
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v12, v10, v9
+; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 1 x half> %vc
%vd = call <vscale x 1 x half> @llvm.experimental.constrained.fma.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -55,18 +52,15 @@ define <vscale x 1 x half> @vfmsub_vf_nxv1f16(<vscale x 1 x half> %va, <vscale x
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v9, v8
+; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -86,19 +80,16 @@ define <vscale x 2 x half> @vfmsub_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfmsub_vv_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v11
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v12, v9, v10
+; ZVFHMIN-NEXT: vfmadd.vv v10, v9, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 2 x half> %vb
%vd = call <vscale x 2 x half> @llvm.experimental.constrained.fma.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vc, <vscale x 2 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -119,18 +110,15 @@ define <vscale x 2 x half> @vfmsub_vf_nxv2f16(<vscale x 2 x half> %va, <vscale x
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v8, v9
+; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -150,19 +138,16 @@ define <vscale x 4 x half> @vfmsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfmsub_vv_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v16, v10, v14
+; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 4 x half> %vc
%vd = call <vscale x 4 x half> @llvm.experimental.constrained.fma.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x half> %va, <vscale x 4 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -183,16 +168,13 @@ define <vscale x 4 x half> @vfmsub_vf_nxv4f16(<vscale x 4 x half> %va, <vscale x
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v10, v14
+; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
@@ -214,19 +196,16 @@ define <vscale x 8 x half> @vfmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfmsub_vv_nxv8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v20, v16, v12
+; ZVFHMIN-NEXT: vfmadd.vv v12, v20, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 8 x half> %va
%vd = call <vscale x 8 x half> @llvm.experimental.constrained.fma.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x half> %vc, <vscale x 8 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -247,16 +226,13 @@ define <vscale x 8 x half> @vfmsub_vf_nxv8f16(<vscale x 8 x half> %va, <vscale x
; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v12, v20
+; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
@@ -278,19 +254,30 @@ define <vscale x 16 x half> @vfmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
;
; ZVFHMIN-LABEL: vfmsub_vv_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v24, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: sub sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v16, v24, v0
+; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add sp, sp, a0
+; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 16 x half> %vb
%vd = call <vscale x 16 x half> @llvm.experimental.constrained.fma.nxv16f16(<vscale x 16 x half> %vc, <vscale x 16 x half> %va, <vscale x 16 x half> %neg, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -312,16 +299,13 @@ define <vscale x 16 x half> @vfmsub_vf_nxv16f16(<vscale x 16 x half> %va, <vscal
; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v0
+; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
@@ -351,58 +335,60 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: mul a1, a1, a2
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; ZVFHMIN-NEXT: vmv8r.v v24, v16
+; ZVFHMIN-NEXT: vmv8r.v v0, v16
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vl8re16.v v16, (a0)
+; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: vmv4r.v v20, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v0, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v0, v8, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
+; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8
+; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: li a1, 24
; ZVFHMIN-NEXT: mul a0, a0, a1
@@ -431,85 +417,78 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: sub sp, sp, a0
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 28 * vlenb
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 12
+; ZVFHMIN-NEXT: li a1, 20
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vmv8r.v v24, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmv.v.f v0, fa5
+; ZVFHMIN-NEXT: vfmv.v.f v24, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v24
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs4r.v v20, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: li a1, 12
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v0, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v0
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 20
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 20
+; ZVFHMIN-NEXT: li a1, 12
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmacc.vv v0, v24, v16
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 20
+; ZVFHMIN-NEXT: li a1, 12
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vl4r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 12
+; ZVFHMIN-NEXT: li a1, 20
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v20
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 20
+; ZVFHMIN-NEXT: li a1, 12
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24
+; ZVFHMIN-NEXT: vfmadd.vv v0, v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: li a1, 28
; ZVFHMIN-NEXT: mul a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
index 21b895b812354a..2991e52d4266a6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
@@ -17,12 +17,9 @@ define <vscale x 1 x half> @vfneg_vv_nxv1f16(<vscale x 1 x half> %va) {
;
; ZVFHMIN-LABEL: vfneg_vv_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%vb = fneg <vscale x 1 x half> %va
ret <vscale x 1 x half> %vb
@@ -37,12 +34,9 @@ define <vscale x 2 x half> @vfneg_vv_nxv2f16(<vscale x 2 x half> %va) {
;
; ZVFHMIN-LABEL: vfneg_vv_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%vb = fneg <vscale x 2 x half> %va
ret <vscale x 2 x half> %vb
@@ -57,12 +51,9 @@ define <vscale x 4 x half> @vfneg_vv_nxv4f16(<vscale x 4 x half> %va) {
;
; ZVFHMIN-LABEL: vfneg_vv_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%vb = fneg <vscale x 4 x half> %va
ret <vscale x 4 x half> %vb
@@ -77,12 +68,9 @@ define <vscale x 8 x half> @vfneg_vv_nxv8f16(<vscale x 8 x half> %va) {
;
; ZVFHMIN-LABEL: vfneg_vv_nxv8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%vb = fneg <vscale x 8 x half> %va
ret <vscale x 8 x half> %vb
@@ -97,12 +85,9 @@ define <vscale x 16 x half> @vfneg_vv_nxv16f16(<vscale x 16 x half> %va) {
;
; ZVFHMIN-LABEL: vfneg_vv_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%vb = fneg <vscale x 16 x half> %va
ret <vscale x 16 x half> %vb
@@ -117,17 +102,9 @@ define <vscale x 32 x half> @vfneg_vv_nxv32f16(<vscale x 32 x half> %va) {
;
; ZVFHMIN-LABEL: vfneg_vv_nxv32f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
%vb = fneg <vscale x 32 x half> %va
ret <vscale x 32 x half> %vb
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
index b54590cd9d8440..2f41b59d6b2253 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
@@ -22,24 +22,17 @@ define <vscale x 1 x half> @vfnmsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v11
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v8, v9
+; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 1 x half> %va
%neg2 = fneg <vscale x 1 x half> %vc
@@ -61,23 +54,16 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16(<vscale x 1 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v11
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v11, v9, v8
+; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -98,24 +84,17 @@ define <vscale x 2 x half> @vfnmsub_vv_nxv2f16(<vscale x 2 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v11
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v11
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v10
+; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 2 x half> %va
%neg2 = fneg <vscale x 2 x half> %vb
@@ -137,23 +116,16 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16(<vscale x 2 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v9, v8
+; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -174,24 +146,17 @@ define <vscale x 4 x half> @vfnmsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v14
+; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 4 x half> %vb
%neg2 = fneg <vscale x 4 x half> %vc
@@ -213,23 +178,16 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16(<vscale x 4 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v10, v12, a0
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v10, v14
+; ZVFHMIN-NEXT: vfmadd.vv v10, v14, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -250,24 +208,17 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12
+; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 8 x half> %vb
%neg2 = fneg <vscale x 8 x half> %va
@@ -289,23 +240,16 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16(<vscale x 8 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v12, v16, a0
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v12, v20
+; ZVFHMIN-NEXT: vfmadd.vv v12, v20, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -326,25 +270,17 @@ define <vscale x 16 x half> @vfnmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vmv4r.v v4, v8
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a0
+; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v24
+; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 16 x half> %vc
%neg2 = fneg <vscale x 16 x half> %vb
@@ -361,29 +297,21 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16(<vscale x 16 x half> %va, <vsca
;
; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vmv4r.v v28, v8
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v16, v24, a0
+; ZVFHMIN-NEXT: vxor.vx v12, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
+; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -408,92 +336,79 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 5
; ZVFHMIN-NEXT: sub sp, sp, a1
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v24, v24, a0
+; ZVFHMIN-NEXT: csrr a1, vlenb
+; ZVFHMIN-NEXT: slli a1, a1, 4
+; ZVFHMIN-NEXT: add a1, sp, a1
+; ZVFHMIN-NEXT: addi a1, a1, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vxor.vx v0, v16, a0
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v0, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v0, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v8
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
+; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
+; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v28
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8
+; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
@@ -515,80 +430,95 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 20
+; ZVFHMIN-NEXT: li a1, 40
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: sub sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x14, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 20 * vlenb
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 12
-; ZVFHMIN-NEXT: mul a0, a0, a1
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmv.v.f v24, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v0, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v0
+; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v24
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 5
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv.v.v v4, v0
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v24, v0, a0
+; ZVFHMIN-NEXT: csrr a1, vlenb
+; ZVFHMIN-NEXT: slli a1, a1, 4
+; ZVFHMIN-NEXT: add a1, sp, a1
+; ZVFHMIN-NEXT: addi a1, a1, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vxor.vx v16, v8, a0
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 5
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v0, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 12
+; ZVFHMIN-NEXT: li a1, 24
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 12
+; ZVFHMIN-NEXT: li a1, 24
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8
+; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 20
+; ZVFHMIN-NEXT: li a1, 40
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
index 2f7e693a8a6f98..dc23b7dfbf1ee4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
@@ -22,19 +22,16 @@ define <vscale x 1 x half> @vfnmsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v11
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v8
+; ZVFHMIN-NEXT: vfmadd.vv v10, v11, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 1 x half> %va
%vd = call <vscale x 1 x half> @llvm.experimental.constrained.fma.nxv1f16(<vscale x 1 x half> %neg, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -55,18 +52,15 @@ define <vscale x 1 x half> @vfnmsub_vf_nxv1f16(<vscale x 1 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v11
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v11, v9, v8
+; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -86,19 +80,16 @@ define <vscale x 2 x half> @vfnmsub_vv_nxv2f16(<vscale x 2 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v11
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v9, v8
+; ZVFHMIN-NEXT: vfmadd.vv v9, v11, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 2 x half> %va
%vd = call <vscale x 2 x half> @llvm.experimental.constrained.fma.nxv2f16(<vscale x 2 x half> %neg, <vscale x 2 x half> %vc, <vscale x 2 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -119,18 +110,15 @@ define <vscale x 2 x half> @vfnmsub_vf_nxv2f16(<vscale x 2 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v11
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v10, v9, v8
+; ZVFHMIN-NEXT: vfmadd.vv v9, v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -150,19 +138,16 @@ define <vscale x 4 x half> @vfnmsub_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v9, v9, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12
+; ZVFHMIN-NEXT: vfmadd.vv v10, v12, v14
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 4 x half> %vb
%vd = call <vscale x 4 x half> @llvm.experimental.constrained.fma.nxv4f16(<vscale x 4 x half> %neg, <vscale x 4 x half> %va, <vscale x 4 x half> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -183,18 +168,15 @@ define <vscale x 4 x half> @vfnmsub_vf_nxv4f16(<vscale x 4 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v10, v12, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v10
+; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -214,19 +196,16 @@ define <vscale x 8 x half> @vfnmsub_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v10, v10, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v12, v20, v16
+; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v20
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 8 x half> %vb
%vd = call <vscale x 8 x half> @llvm.experimental.constrained.fma.nxv8f16(<vscale x 8 x half> %neg, <vscale x 8 x half> %vc, <vscale x 8 x half> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -247,18 +226,15 @@ define <vscale x 8 x half> @vfnmsub_vf_nxv8f16(<vscale x 8 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v12, v16, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v12
+; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -278,34 +254,16 @@ define <vscale x 16 x half> @vfnmsub_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
;
; ZVFHMIN-LABEL: vfnmsub_vv_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: addi sp, sp, -16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: sub sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; ZVFHMIN-NEXT: vmv4r.v v4, v12
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a0
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl4r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v8
+; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: add sp, sp, a0
-; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: ret
%neg = fneg <vscale x 16 x half> %vc
%vd = call <vscale x 16 x half> @llvm.experimental.constrained.fma.nxv16f16(<vscale x 16 x half> %neg, <vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -321,38 +279,20 @@ define <vscale x 16 x half> @vfnmsub_vf_nxv16f16(<vscale x 16 x half> %va, <vsca
;
; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: addi sp, sp, -16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: sub sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; ZVFHMIN-NEXT: vmv4r.v v28, v12
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl4r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vxor.vx v16, v24, a0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v8
+; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: add sp, sp, a0
-; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %c, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
@@ -376,77 +316,79 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 5
; ZVFHMIN-NEXT: sub sp, sp, a1
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
+; ZVFHMIN-NEXT: vmv8r.v v0, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v24, a0
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v0, v0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v0
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vmv4r.v v20, v12
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v8
; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v20
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8
+; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v0
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
@@ -467,73 +409,86 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: sub sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmv.v.f v24, fa5
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v24, v24
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v24
+; ZVFHMIN-NEXT: vmv8r.v v24, v16
+; ZVFHMIN-NEXT: vmv8r.v v16, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vmv8r.v v8, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv.v.v v4, v0
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v0, v0, a0
; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
-; ZVFHMIN-NEXT: vmv.v.v v8, v4
+; ZVFHMIN-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv8r.v v8, v24
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v16
+; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v0
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v20
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v20
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v8
+; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v0
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
More information about the llvm-commits
mailing list