[llvm] 3297571 - [VE] v256f32|64 fma isel
Simon Moll via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 14 08:00:26 PDT 2022
Author: Simon Moll
Date: 2022-03-14T15:59:13+01:00
New Revision: 3297571e325a73bad615c57ec117a12f190bd6bc
URL: https://github.com/llvm/llvm-project/commit/3297571e325a73bad615c57ec117a12f190bd6bc
DIFF: https://github.com/llvm/llvm-project/commit/3297571e325a73bad615c57ec117a12f190bd6bc.diff
LOG: [VE] v256f32|64 fma isel
llvm.fma|fmuladd vp.fma isel and tests
Reviewed By: kaz7
Differential Revision: https://reviews.llvm.org/D121477
Added:
llvm/test/CodeGen/VE/Vector/vec_fma.ll
llvm/test/CodeGen/VE/Vector/vp_fma.ll
llvm/test/CodeGen/VE/Vector/vp_fma_merge.ll
Modified:
llvm/lib/Target/VE/VVPISelLowering.cpp
llvm/lib/Target/VE/VVPInstrInfo.td
llvm/lib/Target/VE/VVPInstrPatternsVec.td
llvm/lib/Target/VE/VVPNodes.def
Removed:
################################################################################
diff --git a/llvm/lib/Target/VE/VVPISelLowering.cpp b/llvm/lib/Target/VE/VVPISelLowering.cpp
index ab6d2aef6f0a1..881efe3568d22 100644
--- a/llvm/lib/Target/VE/VVPISelLowering.cpp
+++ b/llvm/lib/Target/VE/VVPISelLowering.cpp
@@ -92,20 +92,31 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
VectorV, Mask, AVL, Op->getFlags());
}
- if (VVPOpcode == VEISD::VVP_SELECT) {
+ switch (VVPOpcode) {
+ default:
+ llvm_unreachable("lowerToVVP called for unexpected SDNode.");
+ case VEISD::VVP_FFMA: {
+ // VE has a swizzled operand order in FMA (compared to LLVM IR and
+ // SDNodes).
+ auto X = Op->getOperand(2);
+ auto Y = Op->getOperand(0);
+ auto Z = Op->getOperand(1);
+ return CDAG.getNode(VVPOpcode, LegalVecVT, {X, Y, Z, Mask, AVL});
+ }
+ case VEISD::VVP_SELECT: {
auto Mask = Op->getOperand(0);
auto OnTrue = Op->getOperand(1);
auto OnFalse = Op->getOperand(2);
return CDAG.getNode(VVPOpcode, LegalVecVT, {OnTrue, OnFalse, Mask, AVL});
}
- if (VVPOpcode == VEISD::VVP_SETCC) {
+ case VEISD::VVP_SETCC: {
EVT LegalResVT = getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
auto LHS = Op->getOperand(0);
auto RHS = Op->getOperand(1);
auto Pred = Op->getOperand(2);
return CDAG.getNode(VVPOpcode, LegalResVT, {LHS, RHS, Pred, Mask, AVL});
}
- llvm_unreachable("lowerToVVP called for unexpected SDNode.");
+ }
}
SDValue VETargetLowering::lowerVVP_LOAD_STORE(SDValue Op,
diff --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td
index 8257033e42d16..594aa613a411f 100644
--- a/llvm/lib/Target/VE/VVPInstrInfo.td
+++ b/llvm/lib/Target/VE/VVPInstrInfo.td
@@ -72,6 +72,17 @@ def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc.
IsVLVT<4>
]>;
+// TernaryFPOp(x,y,z,mask,vl)
+def SDTFPTernaryOpVVP : SDTypeProfile<1, 5, [
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisFP<0>,
+ SDTCisInt<4>,
+ SDTCisSameNumEltsAs<0, 4>,
+ IsVLVT<5>
+]>;
+
// Select(OnTrue, OnFalse, SelMask, vl)
def SDTSelectVVP : SDTypeProfile<1, 4, [ // vp_select, vp_merge
SDTCisVec<0>,
@@ -110,6 +121,12 @@ class vvp_commutative<SDNode RootOp> :
[(RootOp node:$lhs, node:$rhs, node:$mask, node:$vlen),
(RootOp node:$rhs, node:$lhs, node:$mask, node:$vlen)]>;
+class vvp_fma_commutative<SDNode RootOp> :
+ PatFrags<
+ (ops node:$X, node:$Y, node:$Z, node:$mask, node:$vlen),
+ [(RootOp node:$X, node:$Y, node:$Z, node:$mask, node:$vlen),
+ (RootOp node:$X, node:$Z, node:$Y, node:$mask, node:$vlen)]>;
+
// VVP node definitions.
def vvp_add : SDNode<"VEISD::VVP_ADD", SDTIntBinOpVVP>;
def c_vvp_add : vvp_commutative<vvp_add>;
@@ -142,6 +159,9 @@ def vvp_fmul : SDNode<"VEISD::VVP_FMUL", SDTFPBinOpVVP>;
def c_vvp_fmul : vvp_commutative<vvp_fmul>;
def vvp_fdiv : SDNode<"VEISD::VVP_FDIV", SDTFPBinOpVVP>;
+def vvp_ffma : SDNode<"VEISD::VVP_FFMA", SDTFPTernaryOpVVP>;
+def c_vvp_ffma : vvp_fma_commutative<vvp_ffma>;
+
def vvp_scatter : SDNode<"VEISD::VVP_SCATTER", SDTScatterVVP,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def vvp_gather : SDNode<"VEISD::VVP_GATHER", SDTGatherVVP,
diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
index f25fe6561d5cd..0efbb4d3a1a6e 100644
--- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
@@ -376,6 +376,118 @@ defm : Binary_rv_vv<c_vvp_fmul,
defm : Binary_rv_vv<vvp_fsub,
i64, v512f32, v512i1, "PVFSUB">;
+multiclass Ternary_vvv<
+ SDPatternOperator OpNode, ValueType DataVT,
+ ValueType MaskVT, string OpBaseName> {
+ // Masked with passthru.
+ def : Pat<(vvp_select
+ (OpNode DataVT:$vx, DataVT:$vy, DataVT:$vz,
+ (MaskVT srcvalue), (i32 srcvalue)),
+ DataVT:$vfalse,
+ MaskVT:$mask,
+ i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vvvml_v")
+ $vx, $vy, $vz, $mask, $avl, $vfalse)>;
+
+ // Unmasked.
+ def : Pat<(OpNode DataVT:$vx, DataVT:$vy, DataVT:$vz,
+ (MaskVT true_mask), i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vvvl")
+ $vx, $vy, $vz, $avl)>;
+
+ // Masked.
+ def : Pat<(OpNode DataVT:$vx, DataVT:$vy, DataVT:$vz,
+ MaskVT:$mask, i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vvvml")
+ $vx, $vy, $vz, $mask, $avl)>;
+}
+
+multiclass Ternary_rvv<
+ SDPatternOperator OpNode,
+ ValueType ScalarVT, ValueType DataVT,
+ ValueType MaskVT, string OpBaseName> {
+ // Masked with passthru, broadcast first.
+ def : Pat<(vvp_select
+ (OpNode
+ (any_broadcast ScalarVT:$sx), DataVT:$vy, DataVT:$vz,
+ (MaskVT srcvalue), (i32 srcvalue)),
+ DataVT:$vfalse,
+ MaskVT:$mask,
+ i32:$avl),
+ (!cast<Instruction>(OpBaseName#"rvvml_v")
+ $sx, $vy, $vz, $mask, $avl, $vfalse)>;
+
+ // Unmasked, broadcast first.
+ def : Pat<(OpNode
+ (any_broadcast ScalarVT:$sx), DataVT:$vy, DataVT:$vz,
+ (MaskVT true_mask), i32:$avl),
+ (!cast<Instruction>(OpBaseName#"rvvl")
+ $sx, $vy, $vz, $avl)>;
+
+ // Masked, broadcast first.
+ def : Pat<(OpNode
+ (any_broadcast ScalarVT:$sx), DataVT:$vy, DataVT:$vz,
+ MaskVT:$mask, i32:$avl),
+ (!cast<Instruction>(OpBaseName#"rvvml")
+ $sx, $vy, $vz, $mask, $avl)>;
+}
+
+multiclass Ternary_vrv<
+ SDPatternOperator OpNode,
+ ValueType ScalarVT, ValueType DataVT,
+ ValueType MaskVT, string OpBaseName> {
+ // Masked with passthru, broadcast second.
+ def : Pat<(vvp_select
+ (OpNode
+ DataVT:$vx, (any_broadcast ScalarVT:$sy), DataVT:$vz,
+ (MaskVT srcvalue), (i32 srcvalue)),
+ DataVT:$vfalse,
+ MaskVT:$mask,
+ i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vrvml_v")
+ $vx, $sy, $vz,
+ $mask, $avl, $vfalse)>;
+
+ // Unmasked, broadcast second.
+ def : Pat<(OpNode
+ DataVT:$vx, (any_broadcast ScalarVT:$sy), DataVT:$vz,
+ (MaskVT true_mask), i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vrvl")
+ $vx, $sy, $vz, $avl)>;
+
+ // Masked, broadcast second.
+ def : Pat<(OpNode
+ DataVT:$vx, (any_broadcast ScalarVT:$sy), DataVT:$vz,
+ MaskVT:$mask, i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vrvml")
+ $vx, $sy, $vz, $mask, $avl)>;
+}
+
+multiclass Ternary_rvv_vrv_vvv<
+ SDPatternOperator OpNode,
+ ValueType ScalarVT, ValueType DataVT,
+ ValueType MaskVT, string OpBaseName> {
+ defm : Ternary_rvv<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
+ defm : Ternary_vrv<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
+ defm : Ternary_vvv<OpNode, DataVT, MaskVT, OpBaseName>;
+}
+
+// Expand both 64bit and 32 bit variant (256 elements)
+multiclass Ternary_ShortLong<
+ SDPatternOperator OpNode,
+ ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName,
+ ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
+ defm : Ternary_rvv_vrv_vvv<OpNode, LongScalarVT, LongDataVT,
+ v256i1, LongOpBaseName>;
+ defm : Ternary_rvv_vrv_vvv<OpNode, ShortScalarVT, ShortDataVT,
+ v256i1, ShortOpBaseName>;
+}
+
+defm : Ternary_ShortLong<c_vvp_ffma,
+ f64, v256f64, "VFMADD", f32, v256f32, "VFMADS">;
+defm : Ternary_rvv_vrv_vvv<c_vvp_ffma,
+ i64, v512f32, v512i1, "PVFMAD">;
+
multiclass Merge_mvv<
SDPatternOperator OpNode,
ValueType DataVT, ValueType MaskVT,
diff --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def
index e042410a0e52e..2d8c694eea5f9 100644
--- a/llvm/lib/Target/VE/VVPNodes.def
+++ b/llvm/lib/Target/VE/VVPNodes.def
@@ -33,6 +33,14 @@
HANDLE_VP_TO_VVP(VPNAME, VVPNAME)
#endif
+/// ADD_TERNARY_VVP_OP(VVPNAME,SDNAME)
+/// \p VVPName is a VVP Ternary operator.
+/// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
+#ifndef ADD_TERNARY_VVP_OP
+#define ADD_TERNARY_VVP_OP(VVPNAME,SDNAME) \
+ ADD_VVP_OP(VVPNAME,SDNAME)
+#endif
+
#ifndef ADD_BINARY_VVP_OP_COMPACT
#define ADD_BINARY_VVP_OP_COMPACT(NAME) \
ADD_BINARY_VVP_OP(VVP_##NAME,VP_##NAME,NAME)
@@ -97,6 +105,8 @@ ADD_BINARY_VVP_OP_COMPACT(FSUB) REGISTER_PACKED(VVP_FSUB)
ADD_BINARY_VVP_OP_COMPACT(FMUL) REGISTER_PACKED(VVP_FMUL)
ADD_BINARY_VVP_OP_COMPACT(FDIV)
+ADD_TERNARY_VVP_OP(VVP_FFMA,FMA) HANDLE_VP_TO_VVP(VP_FMA, VVP_FFMA) REGISTER_PACKED(VVP_FFMA)
+
ADD_VVP_OP(VVP_SETCC, SETCC)
// Shuffles.
@@ -106,6 +116,7 @@ HANDLE_VP_TO_VVP(VP_MERGE, VVP_SELECT)
#undef ADD_BINARY_VVP_OP
+#undef ADD_TERNARY_VVP_OP
#undef ADD_BINARY_VVP_OP_COMPACT
#undef ADD_REDUCE_VVP_OP
#undef ADD_VVP_OP
diff --git a/llvm/test/CodeGen/VE/Vector/vec_fma.ll b/llvm/test/CodeGen/VE/Vector/vec_fma.ll
new file mode 100644
index 0000000000000..e0a92f165d99f
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/vec_fma.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+declare <256 x float> @llvm.fma.v256f32(<256 x float>, <256 x float>, <256 x float>)
+
+define fastcc <256 x float> @test_vec_fma_v256f32_vvv(<256 x float> %i0, <256 x float> %i1, <256 x float> %i2) {
+; CHECK-LABEL: test_vec_fma_v256f32_vvv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfmad.s %v0, %v2, %v0, %v1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x float> @llvm.fma.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x float> %i2)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vec_fma_v256f32_rvv(float %s0, <256 x float> %i1, <256 x float> %i2) {
+; CHECK-LABEL: test_vec_fma_v256f32_rvv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmad.s %v0, %v1, %s0, %v0
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x float> undef, float %s0, i32 0
+ %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.fma.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x float> %i2)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vec_fma_v256f32_vrv(<256 x float> %i0, float %s1, <256 x float> %i2) {
+; CHECK-LABEL: test_vec_fma_v256f32_vrv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmad.s %v0, %v1, %s0, %v0
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x float> undef, float %s1, i32 0
+ %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.fma.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x float> %i2)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vec_fma_v256f32_vvr(<256 x float> %i0, <256 x float> %i1, float %s2) {
+; CHECK-LABEL: test_vec_fma_v256f32_vvr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmad.s %v0, %s0, %v0, %v1
+; CHECK-NEXT: b.l.t (, %s10)
+ %zins = insertelement <256 x float> undef, float %s2, i32 0
+ %i2 = shufflevector <256 x float> %zins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.fma.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x float> %i2)
+ ret <256 x float> %r0
+}
+
+declare <256 x double> @llvm.fma.v256f64(<256 x double>, <256 x double>, <256 x double>)
+
+define fastcc <256 x double> @test_vec_fma_v256f64_vvv(<256 x double> %i0, <256 x double> %i1, <256 x double> %i2) {
+; CHECK-LABEL: test_vec_fma_v256f64_vvv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfmad.d %v0, %v2, %v0, %v1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x double> @llvm.fma.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x double> %i2)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vec_fma_v256f64_rvv(double %s0, <256 x double> %i1, <256 x double> %i2) {
+; CHECK-LABEL: test_vec_fma_v256f64_rvv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmad.d %v0, %v1, %s0, %v0
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x double> undef, double %s0, i32 0
+ %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.fma.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x double> %i2)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vec_fma_v256f64_vrv(<256 x double> %i0, double %s1, <256 x double> %i2) {
+; CHECK-LABEL: test_vec_fma_v256f64_vrv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmad.d %v0, %v1, %s0, %v0
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x double> undef, double %s1, i32 0
+ %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.fma.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x double> %i2)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vec_fma_v256f64_vvr(<256 x double> %i0, <256 x double> %i1, double %s2) {
+; CHECK-LABEL: test_vec_fma_v256f64_vvr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmad.d %v0, %s0, %v0, %v1
+; CHECK-NEXT: b.l.t (, %s10)
+ %zins = insertelement <256 x double> undef, double %s2, i32 0
+ %i2 = shufflevector <256 x double> %zins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.fma.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x double> %i2)
+ ret <256 x double> %r0
+}
diff --git a/llvm/test/CodeGen/VE/Vector/vp_fma.ll b/llvm/test/CodeGen/VE/Vector/vp_fma.ll
new file mode 100644
index 0000000000000..6934184427638
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/vp_fma.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+declare <256 x float> @llvm.vp.fma.v256f32(<256 x float>, <256 x float>, <256 x float>, <256 x i1>, i32)
+
+define fastcc <256 x float> @test_vp_fma_v256f32_vvv(<256 x float> %i0, <256 x float> %i1, <256 x float> %i2, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fma_v256f32_vvv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfmad.s %v0, %v2, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x float> @llvm.vp.fma.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x float> %i2, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fma_v256f32_rvv(float %s0, <256 x float> %i1, <256 x float> %i2, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fma_v256f32_rvv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmad.s %v0, %v1, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x float> undef, float %s0, i32 0
+ %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fma.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x float> %i2, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fma_v256f32_vrv(<256 x float> %i0, float %s1, <256 x float> %i2, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fma_v256f32_vrv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmad.s %v0, %v1, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x float> undef, float %s1, i32 0
+ %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fma.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x float> %i2, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fma_v256f32_vvr(<256 x float> %i0, <256 x float> %i1, float %s2, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fma_v256f32_vvr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmad.s %v0, %s0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %zins = insertelement <256 x float> undef, float %s2, i32 0
+ %i2 = shufflevector <256 x float> %zins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fma.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x float> %i2, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+declare <256 x double> @llvm.vp.fma.v256f64(<256 x double>, <256 x double>, <256 x double>, <256 x i1>, i32)
+
+define fastcc <256 x double> @test_vp_fma_v256f64_vvv(<256 x double> %i0, <256 x double> %i1, <256 x double> %i2, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fma_v256f64_vvv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfmad.d %v0, %v2, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x double> @llvm.vp.fma.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x double> %i2, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fma_v256f64_rvv(double %s0, <256 x double> %i1, <256 x double> %i2, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fma_v256f64_rvv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmad.d %v0, %v1, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x double> undef, double %s0, i32 0
+ %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fma.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x double> %i2, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fma_v256f64_vrv(<256 x double> %i0, double %s1, <256 x double> %i2, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fma_v256f64_vrv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmad.d %v0, %v1, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x double> undef, double %s1, i32 0
+ %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fma.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x double> %i2, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fma_v256f64_vvr(<256 x double> %i0, <256 x double> %i1, double %s2, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fma_v256f64_vvr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmad.d %v0, %s0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %zins = insertelement <256 x double> undef, double %s2, i32 0
+ %i2 = shufflevector <256 x double> %zins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fma.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x double> %i2, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
diff --git a/llvm/test/CodeGen/VE/Vector/vp_fma_merge.ll b/llvm/test/CodeGen/VE/Vector/vp_fma_merge.ll
new file mode 100644
index 0000000000000..da2851538c659
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/vp_fma_merge.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+declare <256 x float> @llvm.vp.merge.v256f32(<256 x i1>, <256 x float>, <256 x float>, i32)
+declare <256 x float> @llvm.vp.fma.v256f32(<256 x float>, <256 x float>, <256 x float>, <256 x i1>, i32)
+
+define fastcc <256 x float> @test_vp_fma_v256f32_vvv_merge(<256 x float> %i0, <256 x float> %i1, <256 x float> %i2, <256 x i1> %m, i32 %n, <256 x float> %passthru) {
+; CHECK-LABEL: test_vp_fma_v256f32_vvv_merge:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfmad.s %v3, %v2, %v0, %v1, %vm1
+; CHECK-NEXT: lea %s16, 256
+; CHECK-NEXT: lvl %s16
+; CHECK-NEXT: vor %v0, (0)1, %v3
+; CHECK-NEXT: b.l.t (, %s10)
+ %vr = call <256 x float> @llvm.vp.fma.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x float> %i2, <256 x i1> %m, i32 %n)
+ %r0 = call <256 x float> @llvm.vp.merge.v256f32(<256 x i1> %m, <256 x float> %vr, <256 x float> %passthru, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fma_v256f32_rvv_merge(float %s0, <256 x float> %i1, <256 x float> %i2, <256 x i1> %m, i32 %n, <256 x float> %passthru) {
+; CHECK-LABEL: test_vp_fma_v256f32_rvv_merge:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmad.s %v2, %v1, %s0, %v0, %vm1
+; CHECK-NEXT: lea %s16, 256
+; CHECK-NEXT: lvl %s16
+; CHECK-NEXT: vor %v0, (0)1, %v2
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x float> undef, float %s0, i32 0
+ %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
+ %vr = call <256 x float> @llvm.vp.fma.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x float> %i2, <256 x i1> %m, i32 %n)
+ %r0 = call <256 x float> @llvm.vp.merge.v256f32(<256 x i1> %m, <256 x float> %vr, <256 x float> %passthru, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fma_v256f32_vrv_merge(<256 x float> %i0, float %s1, <256 x float> %i2, <256 x i1> %m, i32 %n, <256 x float> %passthru) {
+; CHECK-LABEL: test_vp_fma_v256f32_vrv_merge:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmad.s %v2, %v1, %s0, %v0, %vm1
+; CHECK-NEXT: lea %s16, 256
+; CHECK-NEXT: lvl %s16
+; CHECK-NEXT: vor %v0, (0)1, %v2
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x float> undef, float %s1, i32 0
+ %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
+ %vr = call <256 x float> @llvm.vp.fma.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x float> %i2, <256 x i1> %m, i32 %n)
+ %r0 = call <256 x float> @llvm.vp.merge.v256f32(<256 x i1> %m, <256 x float> %vr, <256 x float> %passthru, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fma_v256f32_vvr_merge(<256 x float> %i0, <256 x float> %i1, float %s2, <256 x i1> %m, i32 %n, <256 x float> %passthru) {
+; CHECK-LABEL: test_vp_fma_v256f32_vvr_merge:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmad.s %v2, %s0, %v0, %v1, %vm1
+; CHECK-NEXT: lea %s16, 256
+; CHECK-NEXT: lvl %s16
+; CHECK-NEXT: vor %v0, (0)1, %v2
+; CHECK-NEXT: b.l.t (, %s10)
+ %zins = insertelement <256 x float> undef, float %s2, i32 0
+ %i2 = shufflevector <256 x float> %zins, <256 x float> undef, <256 x i32> zeroinitializer
+ %vr = call <256 x float> @llvm.vp.fma.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x float> %i2, <256 x i1> %m, i32 %n)
+ %r0 = call <256 x float> @llvm.vp.merge.v256f32(<256 x i1> %m, <256 x float> %vr, <256 x float> %passthru, i32 %n)
+ ret <256 x float> %r0
+}
More information about the llvm-commits
mailing list