[llvm] [RISCV] Support Strict FP arithmetic Op when only have Zvfhmin (PR #68867)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 12 02:42:42 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Jianjian Guan (jacquesguan)
<details>
<summary>Changes</summary>
Include: STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FSQRT and STRICT_FMA.
---
Patch is 192.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/68867.diff
11 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (+42)
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+46-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll (+233-62)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll (+253-67)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll (+362-65)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll (+403-65)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll (+233-62)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll (+461-65)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll (+441-65)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfsqrt-constrained-sdnode.ll (+101-32)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll (+253-67)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index ddc3b94e9c29a58..7e8faaecedfe146 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -179,6 +179,8 @@ class VectorLegalizer {
/// type.
void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
public:
VectorLegalizer(SelectionDAG& dag) :
DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
@@ -634,6 +636,38 @@ void VectorLegalizer::PromoteSETCC(SDNode *Node,
Results.push_back(Res);
}
+void VectorLegalizer::PromoteSTRICT(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ MVT VecVT = Node->getOperand(1).getSimpleValueType();
+ MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
+
+ assert(VecVT.isFloatingPoint());
+
+ SDLoc DL(Node);
+ SmallVector<SDValue, 5> Operands(Node->getNumOperands());
+
+ for (unsigned j = 0; j != Node->getNumOperands(); ++j)
+ if (Node->getOperand(j).getValueType().isVector() &&
+ !(ISD::isVPOpcode(Node->getOpcode()) &&
+ ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand.
+ // promote the vector operand.
+ Operands[j] =
+ DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j));
+ else
+ Operands[j] = Node->getOperand(j); // Skip VL operand.
+
+ SDVTList VTs = DAG.getVTList(NewVecVT, Node->getValueType(1));
+
+ SDValue Res =
+ DAG.getNode(Node->getOpcode(), DL, VTs, Operands, Node->getFlags());
+
+ SDValue Res0 = DAG.getNode(ISD::FP_ROUND, DL, VecVT, Res.getValue(0),
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+
+ Results.push_back(Res0);
+ Results.push_back(Res.getValue(1));
+}
+
void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
// For a few operations there is a specific concept for promotion based on
// the operand's type.
@@ -674,6 +708,14 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
// Promote the operation by extending the operand.
PromoteSETCC(Node, Results);
return;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ PromoteSTRICT(Node, Results);
+ return;
case ISD::FP_ROUND:
case ISD::FP_EXTEND:
// These operations are used to do promotion so they can't be promoted
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5cf5ee496656da3..9559849d74cbfd6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -827,12 +827,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// TODO: support more ops.
static const unsigned ZvfhminPromoteOps[] = {
- ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
- ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
- ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
- ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
- ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM,
- ISD::FMINIMUM};
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
+ ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
+ ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
+ ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
+ ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM,
+ ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
// TODO: support more vp ops.
static const unsigned ZvfhminPromoteVPOps[] = {
@@ -5452,6 +5453,41 @@ static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) {
{ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
}
+static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) {
+
+ assert(Op->isStrictFPOpcode());
+
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
+
+ SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
+ SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
+
+ SDLoc DL(Op);
+
+ SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
+ SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
+
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (!Op.getOperand(j).getValueType().isVector()) {
+ LoOperands[j] = Op.getOperand(j);
+ HiOperands[j] = Op.getOperand(j);
+ continue;
+ }
+ std::tie(LoOperands[j], HiOperands[j]) =
+ DAG.SplitVector(Op.getOperand(j), DL);
+ }
+
+ SDValue LoRes =
+ DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
+ HiOperands[0] = LoRes.getValue(1);
+ SDValue HiRes =
+ DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
+
+ SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
+ LoRes.getValue(0), HiRes.getValue(0));
+ return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
+}
+
SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -6223,6 +6259,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::STRICT_FDIV:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FMA:
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitStrictFPVectorOp(Op, DAG);
return lowerToScalableOp(Op, DAG);
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll
index 04ed41cd0952d1c..10cfa5a5cfb9099 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll
@@ -1,27 +1,57 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
declare <vscale x 1 x half> @llvm.experimental.constrained.fadd.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, metadata, metadata)
define <vscale x 1 x half> @vfadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) strictfp {
-; CHECK-LABEL: vfadd_vv_nxv1f16:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vv_nxv1f16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfadd.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vv_nxv1f16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
entry:
%vc = call <vscale x 1 x half> @llvm.experimental.constrained.fadd.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
ret <vscale x 1 x half> %vc
}
define <vscale x 1 x half> @vfadd_vf_nxv1f16(<vscale x 1 x half> %va, half %b) strictfp {
-; CHECK-LABEL: vfadd_vf_nxv1f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfadd.vf v8, v8, fa0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vf_nxv1f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfadd.vf v8, v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vf_nxv1f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
%vc = call <vscale x 1 x half> @llvm.experimental.constrained.fadd.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -30,22 +60,48 @@ define <vscale x 1 x half> @vfadd_vf_nxv1f16(<vscale x 1 x half> %va, half %b) s
declare <vscale x 2 x half> @llvm.experimental.constrained.fadd.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, metadata, metadata)
define <vscale x 2 x half> @vfadd_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb) strictfp {
-; CHECK-LABEL: vfadd_vv_nxv2f16:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vv_nxv2f16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfadd.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vv_nxv2f16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
entry:
%vc = call <vscale x 2 x half> @llvm.experimental.constrained.fadd.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
ret <vscale x 2 x half> %vc
}
define <vscale x 2 x half> @vfadd_vf_nxv2f16(<vscale x 2 x half> %va, half %b) strictfp {
-; CHECK-LABEL: vfadd_vf_nxv2f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfadd.vf v8, v8, fa0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vf_nxv2f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfadd.vf v8, v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vf_nxv2f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
%vc = call <vscale x 2 x half> @llvm.experimental.constrained.fadd.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -54,22 +110,48 @@ define <vscale x 2 x half> @vfadd_vf_nxv2f16(<vscale x 2 x half> %va, half %b) s
declare <vscale x 4 x half> @llvm.experimental.constrained.fadd.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, metadata, metadata)
define <vscale x 4 x half> @vfadd_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb) strictfp {
-; CHECK-LABEL: vfadd_vv_nxv4f16:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vv_nxv4f16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfadd.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vv_nxv4f16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v10, v12, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
entry:
%vc = call <vscale x 4 x half> @llvm.experimental.constrained.fadd.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
ret <vscale x 4 x half> %vc
}
define <vscale x 4 x half> @vfadd_vf_nxv4f16(<vscale x 4 x half> %va, half %b) strictfp {
-; CHECK-LABEL: vfadd_vf_nxv4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfadd.vf v8, v8, fa0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vf_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfadd.vf v8, v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vf_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
%vc = call <vscale x 4 x half> @llvm.experimental.constrained.fadd.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -78,22 +160,48 @@ define <vscale x 4 x half> @vfadd_vf_nxv4f16(<vscale x 4 x half> %va, half %b) s
declare <vscale x 8 x half> @llvm.experimental.constrained.fadd.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, metadata, metadata)
define <vscale x 8 x half> @vfadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb) strictfp {
-; CHECK-LABEL: vfadd_vv_nxv8f16:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v8, v10
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vv_nxv8f16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfadd.vv v8, v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vv_nxv8f16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v12, v16, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
entry:
%vc = call <vscale x 8 x half> @llvm.experimental.constrained.fadd.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
ret <vscale x 8 x half> %vc
}
define <vscale x 8 x half> @vfadd_vf_nxv8f16(<vscale x 8 x half> %va, half %b) strictfp {
-; CHECK-LABEL: vfadd_vf_nxv8f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfadd.vf v8, v8, fa0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vf_nxv8f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfadd.vf v8, v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vf_nxv8f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
%vc = call <vscale x 8 x half> @llvm.experimental.constrained.fadd.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -102,22 +210,48 @@ define <vscale x 8 x half> @vfadd_vf_nxv8f16(<vscale x 8 x half> %va, half %b) s
declare <vscale x 16 x half> @llvm.experimental.constrained.fadd.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>, metadata, metadata)
define <vscale x 16 x half> @vfadd_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb) strictfp {
-; CHECK-LABEL: vfadd_vv_nxv16f16:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v8, v12
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vv_nxv16f16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfadd.vv v8, v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vv_nxv16f16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v16, v24, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
entry:
%vc = call <vscale x 16 x half> @llvm.experimental.constrained.fadd.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
ret <vscale x 16 x half> %vc
}
define <vscale x 16 x half> @vfadd_vf_nxv16f16(<vscale x 16 x half> %va, half %b) strictfp {
-; CHECK-LABEL: vfadd_vf_nxv16f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfadd.vf v8, v8, fa0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vf_nxv16f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfadd.vf v8, v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vf_nxv16f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/68867
More information about the llvm-commits
mailing list