[llvm] c25293c - [LegalizeVectorOps][RISCV] Don't promote VP_FABS/FNEG/FCOPYSIGN. (#106659)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 30 09:44:55 PDT 2024
Author: Craig Topper
Date: 2024-08-30T09:44:51-07:00
New Revision: c25293c6dd9a71b4655d1d6497ab8576c15e446e
URL: https://github.com/llvm/llvm-project/commit/c25293c6dd9a71b4655d1d6497ab8576c15e446e
DIFF: https://github.com/llvm/llvm-project/commit/c25293c6dd9a71b4655d1d6497ab8576c15e446e.diff
LOG: [LegalizeVectorOps][RISCV] Don't promote VP_FABS/FNEG/FCOPYSIGN. (#106659)
Promoting canonicalizes NaNs which changes the semantics. Bitcast to
integer and use logic ops instead.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll
llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll
llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll
llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 2557fa288606e7..87221c14433ab5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -135,6 +135,9 @@ class VectorLegalizer {
SDValue ExpandVP_SELECT(SDNode *Node);
SDValue ExpandVP_MERGE(SDNode *Node);
SDValue ExpandVP_REM(SDNode *Node);
+ SDValue ExpandVP_FNEG(SDNode *Node);
+ SDValue ExpandVP_FABS(SDNode *Node);
+ SDValue ExpandVP_FCOPYSIGN(SDNode *Node);
SDValue ExpandSELECT(SDNode *Node);
std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
SDValue ExpandStore(SDNode *N);
@@ -699,6 +702,11 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
// These operations are used to do promotion so they can't be promoted
// themselves.
llvm_unreachable("Don't know how to promote this operation!");
+ case ISD::VP_FABS:
+ case ISD::VP_FCOPYSIGN:
+ case ISD::VP_FNEG:
+ // Promoting fabs, fneg, and fcopysign changes their semantics.
+ llvm_unreachable("These operations should not be promoted");
}
// There are currently two cases of vector promotion:
@@ -887,6 +895,24 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::VP_FNEG:
+ if (SDValue Expanded = ExpandVP_FNEG(Node)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::VP_FABS:
+ if (SDValue Expanded = ExpandVP_FABS(Node)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
+ case ISD::VP_FCOPYSIGN:
+ if (SDValue Expanded = ExpandVP_FCOPYSIGN(Node)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::SELECT:
Results.push_back(ExpandSELECT(Node));
return;
@@ -1557,6 +1583,80 @@ SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) {
return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL);
}
+SDValue VectorLegalizer::ExpandVP_FNEG(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
+ EVT IntVT = VT.changeVectorElementTypeToInteger();
+
+ if (!TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT))
+ return SDValue();
+
+ SDValue Mask = Node->getOperand(1);
+ SDValue EVL = Node->getOperand(2);
+
+ SDLoc DL(Node);
+ SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+ SDValue SignMask = DAG.getConstant(
+ APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue Xor = DAG.getNode(ISD::VP_XOR, DL, IntVT, Cast, SignMask, Mask, EVL);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Xor);
+}
+
+SDValue VectorLegalizer::ExpandVP_FABS(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
+ EVT IntVT = VT.changeVectorElementTypeToInteger();
+
+ if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT))
+ return SDValue();
+
+ SDValue Mask = Node->getOperand(1);
+ SDValue EVL = Node->getOperand(2);
+
+ SDLoc DL(Node);
+ SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+ SDValue ClearSignMask = DAG.getConstant(
+ APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue ClearSign =
+ DAG.getNode(ISD::VP_AND, DL, IntVT, Cast, ClearSignMask, Mask, EVL);
+ return DAG.getNode(ISD::BITCAST, DL, VT, ClearSign);
+}
+
+SDValue VectorLegalizer::ExpandVP_FCOPYSIGN(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
+
+ if (VT != Node->getOperand(1).getValueType())
+ return SDValue();
+
+ EVT IntVT = VT.changeVectorElementTypeToInteger();
+ if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT) ||
+ !TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT))
+ return SDValue();
+
+ SDValue Mask = Node->getOperand(2);
+ SDValue EVL = Node->getOperand(3);
+
+ SDLoc DL(Node);
+ SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+ SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
+
+ SDValue SignMask = DAG.getConstant(
+ APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue SignBit =
+ DAG.getNode(ISD::VP_AND, DL, IntVT, Sign, SignMask, Mask, EVL);
+
+ SDValue ClearSignMask = DAG.getConstant(
+ APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue ClearedSign =
+ DAG.getNode(ISD::VP_AND, DL, IntVT, Mag, ClearSignMask, Mask, EVL);
+
+ SDNodeFlags Flags;
+ Flags.setDisjoint(true);
+
+ SDValue CopiedSign = DAG.getNode(ISD::VP_OR, DL, IntVT, ClearedSign, SignBit,
+ Mask, EVL, Flags);
+
+ return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
+}
+
void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
// Attempt to expand using TargetLowering.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 33bc4b063bbb48..e990325ac38279 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -891,16 +891,30 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
// TODO: support more vp ops.
- static const unsigned ZvfhminPromoteVPOps[] = {
- ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
- ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
- ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
- ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
- ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
- ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
- ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
- ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
- ISD::VP_FMAXIMUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM};
+ static const unsigned ZvfhminPromoteVPOps[] = {ISD::VP_FADD,
+ ISD::VP_FSUB,
+ ISD::VP_FMUL,
+ ISD::VP_FDIV,
+ ISD::VP_FMA,
+ ISD::VP_REDUCE_FADD,
+ ISD::VP_REDUCE_SEQ_FADD,
+ ISD::VP_REDUCE_FMIN,
+ ISD::VP_REDUCE_FMAX,
+ ISD::VP_SQRT,
+ ISD::VP_FMINNUM,
+ ISD::VP_FMAXNUM,
+ ISD::VP_FCEIL,
+ ISD::VP_FFLOOR,
+ ISD::VP_FROUND,
+ ISD::VP_FROUNDEVEN,
+ ISD::VP_FROUNDTOZERO,
+ ISD::VP_FRINT,
+ ISD::VP_FNEARBYINT,
+ ISD::VP_SETCC,
+ ISD::VP_FMINIMUM,
+ ISD::VP_FMAXIMUM,
+ ISD::VP_REDUCE_FMINIMUM,
+ ISD::VP_REDUCE_FMAXIMUM};
// Sets common operation actions on RVV floating-point vector types.
const auto SetCommonVFPActions = [&](MVT VT) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll
index ae3dce497c6d07..90a856605c70d8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll
@@ -19,12 +19,10 @@ define <2 x half> @vfabs_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl)
;
; ZVFHMIN-LABEL: vfabs_vv_v2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v9, v9, v0.t
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
@@ -39,12 +37,10 @@ define <2 x half> @vfabs_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
;
; ZVFHMIN-LABEL: vfabs_vv_v2f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v9, v9
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl)
ret <2 x half> %v
@@ -61,12 +57,10 @@ define <4 x half> @vfabs_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl)
;
; ZVFHMIN-LABEL: vfabs_vv_v4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v9, v9, v0.t
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
@@ -81,12 +75,10 @@ define <4 x half> @vfabs_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
;
; ZVFHMIN-LABEL: vfabs_vv_v4f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v9, v9
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl)
ret <4 x half> %v
@@ -103,12 +95,10 @@ define <8 x half> @vfabs_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
;
; ZVFHMIN-LABEL: vfabs_vv_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v10, v10, v0.t
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
@@ -123,12 +113,10 @@ define <8 x half> @vfabs_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
;
; ZVFHMIN-LABEL: vfabs_vv_v8f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v10, v10
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl)
ret <8 x half> %v
@@ -145,12 +133,10 @@ define <16 x half> @vfabs_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %
;
; ZVFHMIN-LABEL: vfabs_vv_v16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v12, v12, v0.t
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
@@ -165,12 +151,10 @@ define <16 x half> @vfabs_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl)
;
; ZVFHMIN-LABEL: vfabs_vv_v16f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v12, v12
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl)
ret <16 x half> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll
index fbc4c56a911340..019923ffdfdedf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll
@@ -19,12 +19,9 @@ define <2 x half> @vfneg_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl)
;
; ZVFHMIN-LABEL: vfneg_vv_v2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v9, v0.t
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
@@ -39,12 +36,9 @@ define <2 x half> @vfneg_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
;
; ZVFHMIN-LABEL: vfneg_vv_v2f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v9
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl)
ret <2 x half> %v
@@ -61,12 +55,9 @@ define <4 x half> @vfneg_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl)
;
; ZVFHMIN-LABEL: vfneg_vv_v4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v9, v0.t
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
@@ -81,12 +72,9 @@ define <4 x half> @vfneg_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
;
; ZVFHMIN-LABEL: vfneg_vv_v4f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v9
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl)
ret <4 x half> %v
@@ -103,12 +91,9 @@ define <8 x half> @vfneg_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
;
; ZVFHMIN-LABEL: vfneg_vv_v8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10, v0.t
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
@@ -123,12 +108,9 @@ define <8 x half> @vfneg_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
;
; ZVFHMIN-LABEL: vfneg_vv_v8f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl)
ret <8 x half> %v
@@ -145,12 +127,9 @@ define <16 x half> @vfneg_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %
;
; ZVFHMIN-LABEL: vfneg_vv_v16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12, v0.t
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
@@ -165,12 +144,9 @@ define <16 x half> @vfneg_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl)
;
; ZVFHMIN-LABEL: vfneg_vv_v16f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl)
ret <16 x half> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll
index 6e34d59a2d9894..e8a7d790758596 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll
@@ -19,13 +19,12 @@ define <vscale x 1 x half> @vfsgnj_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfsgnj_vv_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v10, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.copysign.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
@@ -40,13 +39,12 @@ define <vscale x 1 x half> @vfsgnj_vv_nxv1f16_unmasked(<vscale x 1 x half> %va,
;
; ZVFHMIN-LABEL: vfsgnj_vv_nxv1f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v10
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.copysign.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
ret <vscale x 1 x half> %v
@@ -63,13 +61,12 @@ define <vscale x 2 x half> @vfsgnj_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfsgnj_vv_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v10, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.copysign.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
@@ -84,13 +81,12 @@ define <vscale x 2 x half> @vfsgnj_vv_nxv2f16_unmasked(<vscale x 2 x half> %va,
;
; ZVFHMIN-LABEL: vfsgnj_vv_nxv2f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v10
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.copysign.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret <vscale x 2 x half> %v
@@ -107,13 +103,12 @@ define <vscale x 4 x half> @vfsgnj_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfsgnj_vv_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.copysign.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
@@ -128,13 +123,12 @@ define <vscale x 4 x half> @vfsgnj_vv_nxv4f16_unmasked(<vscale x 4 x half> %va,
;
; ZVFHMIN-LABEL: vfsgnj_vv_nxv4f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.copysign.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret <vscale x 4 x half> %v
@@ -151,13 +145,12 @@ define <vscale x 8 x half> @vfsgnj_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfsgnj_vv_nxv8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v10, v10, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.copysign.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
@@ -172,13 +165,12 @@ define <vscale x 8 x half> @vfsgnj_vv_nxv8f16_unmasked(<vscale x 8 x half> %va,
;
; ZVFHMIN-LABEL: vfsgnj_vv_nxv8f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v10, v10, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.copysign.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret <vscale x 8 x half> %v
@@ -195,13 +187,12 @@ define <vscale x 16 x half> @vfsgnj_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
;
; ZVFHMIN-LABEL: vfsgnj_vv_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v12, v12, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v12, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.copysign.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
@@ -216,13 +207,12 @@ define <vscale x 16 x half> @vfsgnj_vv_nxv16f16_unmasked(<vscale x 16 x half> %v
;
; ZVFHMIN-LABEL: vfsgnj_vv_nxv16f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v12, v12, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.copysign.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
ret <vscale x 16 x half> %v
@@ -239,48 +229,12 @@ define <vscale x 32 x half> @vfsgnj_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
;
; ZVFHMIN-LABEL: vfsgnj_vv_nxv32f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: addi sp, sp, -16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 3
-; ZVFHMIN-NEXT: sub sp, sp, a1
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; ZVFHMIN-NEXT: vmv1r.v v7, v0
-; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: slli a1, a2, 1
-; ZVFHMIN-NEXT: sub a3, a0, a1
-; ZVFHMIN-NEXT: sltu a4, a0, a3
-; ZVFHMIN-NEXT: addi a4, a4, -1
-; ZVFHMIN-NEXT: and a3, a4, a3
-; ZVFHMIN-NEXT: srli a2, a2, 2
-; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2
-; ZVFHMIN-NEXT: addi a2, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v24, v0.t
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2
-; ZVFHMIN-NEXT: # %bb.1:
-; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB10_2:
-; ZVFHMIN-NEXT: addi a1, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vmv1r.v v0, v7
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: add sp, sp, a0
-; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v16, v16, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v16, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.copysign.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
@@ -295,48 +249,12 @@ define <vscale x 32 x half> @vfsgnj_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
;
; ZVFHMIN-LABEL: vfsgnj_vv_nxv32f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: addi sp, sp, -16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 3
-; ZVFHMIN-NEXT: sub sp, sp, a1
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: slli a1, a2, 1
-; ZVFHMIN-NEXT: sub a3, a0, a1
-; ZVFHMIN-NEXT: sltu a4, a0, a3
-; ZVFHMIN-NEXT: addi a4, a4, -1
-; ZVFHMIN-NEXT: and a3, a4, a3
-; ZVFHMIN-NEXT: srli a2, a2, 2
-; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT: vmset.m v24
-; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2
-; ZVFHMIN-NEXT: addi a2, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v24, v0.t
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2
-; ZVFHMIN-NEXT: # %bb.1:
-; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB11_2:
-; ZVFHMIN-NEXT: addi a1, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: add sp, sp, a0
-; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v16, v16, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.copysign.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl)
ret <vscale x 32 x half> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll
index 0f7e3f1e0ea5a2..b9be6eb1fa3737 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll
@@ -19,12 +19,10 @@ define <vscale x 1 x half> @vfabs_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfabs_vv_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v9, v9, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.fabs.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
@@ -39,12 +37,10 @@ define <vscale x 1 x half> @vfabs_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i
;
; ZVFHMIN-LABEL: vfabs_vv_nxv1f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v9, v9
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.fabs.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
ret <vscale x 1 x half> %v
@@ -61,12 +57,10 @@ define <vscale x 2 x half> @vfabs_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfabs_vv_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v9, v9, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.fabs.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
@@ -81,12 +75,10 @@ define <vscale x 2 x half> @vfabs_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i
;
; ZVFHMIN-LABEL: vfabs_vv_nxv2f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v9, v9
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.fabs.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret <vscale x 2 x half> %v
@@ -103,12 +95,10 @@ define <vscale x 4 x half> @vfabs_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfabs_vv_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v10, v10, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.fabs.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
@@ -123,12 +113,10 @@ define <vscale x 4 x half> @vfabs_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i
;
; ZVFHMIN-LABEL: vfabs_vv_nxv4f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v10, v10
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.fabs.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret <vscale x 4 x half> %v
@@ -145,12 +133,10 @@ define <vscale x 8 x half> @vfabs_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfabs_vv_nxv8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v12, v12, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.fabs.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
@@ -165,12 +151,10 @@ define <vscale x 8 x half> @vfabs_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i
;
; ZVFHMIN-LABEL: vfabs_vv_nxv8f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v12, v12
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.fabs.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret <vscale x 8 x half> %v
@@ -187,12 +171,10 @@ define <vscale x 16 x half> @vfabs_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfabs_vv_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v16, v16, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.fabs.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
@@ -207,12 +189,10 @@ define <vscale x 16 x half> @vfabs_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
;
; ZVFHMIN-LABEL: vfabs_vv_nxv16f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v16, v16
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.fabs.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
ret <vscale x 16 x half> %v
@@ -229,32 +209,10 @@ define <vscale x 32 x half> @vfabs_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfabs_vv_nxv32f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vmv1r.v v16, v0
-; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: slli a1, a2, 1
-; ZVFHMIN-NEXT: sub a3, a0, a1
-; ZVFHMIN-NEXT: sltu a4, a0, a3
-; ZVFHMIN-NEXT: addi a4, a4, -1
-; ZVFHMIN-NEXT: and a3, a4, a3
-; ZVFHMIN-NEXT: srli a2, a2, 2
-; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v24, v24, v0.t
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2
-; ZVFHMIN-NEXT: # %bb.1:
-; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB10_2:
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vmv1r.v v0, v16
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.fabs.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
@@ -269,32 +227,10 @@ define <vscale x 32 x half> @vfabs_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
;
; ZVFHMIN-LABEL: vfabs_vv_nxv32f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: slli a1, a2, 1
-; ZVFHMIN-NEXT: sub a3, a0, a1
-; ZVFHMIN-NEXT: sltu a4, a0, a3
-; ZVFHMIN-NEXT: addi a4, a4, -1
-; ZVFHMIN-NEXT: and a3, a4, a3
-; ZVFHMIN-NEXT: srli a2, a2, 2
-; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT: vmset.m v16
-; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v16, v16, v0.t
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2
-; ZVFHMIN-NEXT: # %bb.1:
-; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB11_2:
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v16, v16
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.fabs.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
ret <vscale x 32 x half> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll
index 69ea7ce33cf6b6..af2668a9b0c545 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll
@@ -19,12 +19,9 @@ define <vscale x 1 x half> @vfneg_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfneg_vv_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v9, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
@@ -39,12 +36,9 @@ define <vscale x 1 x half> @vfneg_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i
;
; ZVFHMIN-LABEL: vfneg_vv_nxv1f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v9
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
ret <vscale x 1 x half> %v
@@ -61,12 +55,9 @@ define <vscale x 2 x half> @vfneg_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfneg_vv_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v9, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
@@ -81,12 +72,9 @@ define <vscale x 2 x half> @vfneg_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i
;
; ZVFHMIN-LABEL: vfneg_vv_nxv2f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v9, v9
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret <vscale x 2 x half> %v
@@ -103,12 +91,9 @@ define <vscale x 4 x half> @vfneg_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfneg_vv_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
@@ -123,12 +108,9 @@ define <vscale x 4 x half> @vfneg_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i
;
; ZVFHMIN-LABEL: vfneg_vv_nxv4f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v10, v10
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret <vscale x 4 x half> %v
@@ -145,12 +127,9 @@ define <vscale x 8 x half> @vfneg_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
;
; ZVFHMIN-LABEL: vfneg_vv_nxv8f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
@@ -165,12 +144,9 @@ define <vscale x 8 x half> @vfneg_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i
;
; ZVFHMIN-LABEL: vfneg_vv_nxv8f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v12, v12
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret <vscale x 8 x half> %v
@@ -187,12 +163,9 @@ define <vscale x 16 x half> @vfneg_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfneg_vv_nxv16f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
@@ -207,12 +180,9 @@ define <vscale x 16 x half> @vfneg_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
;
; ZVFHMIN-LABEL: vfneg_vv_nxv16f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
ret <vscale x 16 x half> %v
@@ -229,32 +199,9 @@ define <vscale x 32 x half> @vfneg_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
;
; ZVFHMIN-LABEL: vfneg_vv_nxv32f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vmv1r.v v16, v0
-; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: slli a1, a2, 1
-; ZVFHMIN-NEXT: sub a3, a0, a1
-; ZVFHMIN-NEXT: sltu a4, a0, a3
-; ZVFHMIN-NEXT: addi a4, a4, -1
-; ZVFHMIN-NEXT: and a3, a4, a3
-; ZVFHMIN-NEXT: srli a2, a2, 2
-; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v24, v24, v0.t
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2
-; ZVFHMIN-NEXT: # %bb.1:
-; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB10_2:
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vmv1r.v v0, v16
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v24, v0.t
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.fneg.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
@@ -269,32 +216,9 @@ define <vscale x 32 x half> @vfneg_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
;
; ZVFHMIN-LABEL: vfneg_vv_nxv32f16_unmasked:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: slli a1, a2, 1
-; ZVFHMIN-NEXT: sub a3, a0, a1
-; ZVFHMIN-NEXT: sltu a4, a0, a3
-; ZVFHMIN-NEXT: addi a4, a4, -1
-; ZVFHMIN-NEXT: and a3, a4, a3
-; ZVFHMIN-NEXT: srli a2, a2, 2
-; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT: vmset.m v16
-; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16, v0.t
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2
-; ZVFHMIN-NEXT: # %bb.1:
-; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB11_2:
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v16, v16
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.fneg.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
ret <vscale x 32 x half> %v
More information about the llvm-commits
mailing list