[llvm] b2cea57 - [VE] FADD,FSUB,FMUL,FDIV v256f32|f64 isel and tests
Simon Moll via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 21 00:30:55 PST 2021
Author: Simon Moll
Date: 2021-12-21T09:15:31+01:00
New Revision: b2cea573c9a175688a68eec84dbb37864933f60f
URL: https://github.com/llvm/llvm-project/commit/b2cea573c9a175688a68eec84dbb37864933f60f
DIFF: https://github.com/llvm/llvm-project/commit/b2cea573c9a175688a68eec84dbb37864933f60f.diff
LOG: [VE] FADD,FSUB,FMUL,FDIV v256f32|f64 isel and tests
Depends on D115940 for the `Binary_rv_vr_vv` pattern class op isel
fragment used for divisions.
Reviewed By: kaz7
Differential Revision: https://reviews.llvm.org/D116035
Added:
llvm/test/CodeGen/VE/Vector/vp_fadd.ll
llvm/test/CodeGen/VE/Vector/vp_fdiv.ll
llvm/test/CodeGen/VE/Vector/vp_fmul.ll
llvm/test/CodeGen/VE/Vector/vp_fsub.ll
Modified:
llvm/lib/Target/VE/VVPInstrInfo.td
llvm/lib/Target/VE/VVPInstrPatternsVec.td
llvm/lib/Target/VE/VVPNodes.def
Removed:
################################################################################
diff --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td
index a2d647e36e00d..99566e91ec11c 100644
--- a/llvm/lib/Target/VE/VVPInstrInfo.td
+++ b/llvm/lib/Target/VE/VVPInstrInfo.td
@@ -29,6 +29,16 @@ def SDTIntBinOpVVP : SDTypeProfile<1, 4, [ // vp_add, vp_and, etc.
IsVLVT<4>
]>;
+// BinaryFPOp(x,y,mask,vl)
+def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc.
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisFP<0>,
+ SDTCisInt<3>,
+ SDTCisSameNumEltsAs<0, 3>,
+ IsVLVT<4>
+]>;
+
// Binary operator commutative pattern.
class vvp_commutative<SDNode RootOp> :
PatFrags<
@@ -61,4 +71,11 @@ def vvp_srl : SDNode<"VEISD::VVP_SRL", SDTIntBinOpVVP>;
def vvp_sra : SDNode<"VEISD::VVP_SRA", SDTIntBinOpVVP>;
def vvp_shl : SDNode<"VEISD::VVP_SHL", SDTIntBinOpVVP>;
+def vvp_fadd : SDNode<"VEISD::VVP_FADD", SDTFPBinOpVVP>;
+def c_vvp_fadd : vvp_commutative<vvp_fadd>;
+def vvp_fsub : SDNode<"VEISD::VVP_FSUB", SDTFPBinOpVVP>;
+def vvp_fmul : SDNode<"VEISD::VVP_FMUL", SDTFPBinOpVVP>;
+def c_vvp_fmul : vvp_commutative<vvp_fmul>;
+def vvp_fdiv : SDNode<"VEISD::VVP_FDIV", SDTFPBinOpVVP>;
+
// } Binary Operators
diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
index 3788e49c6c7d1..8d5d9d1035479 100644
--- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
@@ -178,3 +178,16 @@ defm : Binary_vr_vv_ShortLong<vvp_sra,
defm : Binary_vr_vv_ShortLong<vvp_srl,
i64, v256i64, "VSRL",
i32, v256i32, "PVSRLLO">;
+
+defm : Binary_rv_vv_ShortLong<c_vvp_fadd,
+ f64, v256f64, "VFADDD",
+ f32, v256f32, "PVFADDUP">;
+defm : Binary_rv_vv_ShortLong<c_vvp_fmul,
+ f64, v256f64, "VFMULD",
+ f32, v256f32, "PVFMULUP">;
+defm : Binary_rv_vv_ShortLong<vvp_fsub,
+ f64, v256f64, "VFSUBD",
+ f32, v256f32, "PVFSUBUP">;
+defm : Binary_rv_vr_vv_ShortLong<vvp_fdiv,
+ f64, v256f64, "VFDIVD",
+ f32, v256f32, "VFDIVS">;
diff --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def
index 97dcb84ae7d14..8a9231f7d3e67 100644
--- a/llvm/lib/Target/VE/VVPNodes.def
+++ b/llvm/lib/Target/VE/VVPNodes.def
@@ -53,6 +53,12 @@ ADD_BINARY_VVP_OP_COMPACT(AND)
ADD_BINARY_VVP_OP_COMPACT(OR)
ADD_BINARY_VVP_OP_COMPACT(XOR)
+// FP arithmetic.
+ADD_BINARY_VVP_OP_COMPACT(FADD)
+ADD_BINARY_VVP_OP_COMPACT(FSUB)
+ADD_BINARY_VVP_OP_COMPACT(FMUL)
+ADD_BINARY_VVP_OP_COMPACT(FDIV)
+
#undef ADD_BINARY_VVP_OP
#undef ADD_BINARY_VVP_OP_COMPACT
#undef ADD_VVP_OP
diff --git a/llvm/test/CodeGen/VE/Vector/vp_fadd.ll b/llvm/test/CodeGen/VE/Vector/vp_fadd.ll
new file mode 100644
index 0000000000000..804235f4ea264
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/vp_fadd.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+declare <256 x float> @llvm.vp.fadd.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
+
+define fastcc <256 x float> @test_vp_fadd_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fadd_v256f32_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: pvfadd.up %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fadd_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fadd_v256f32_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: pvfadd.up %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x float> undef, float %s0, i32 0
+ %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fadd_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fadd_v256f32_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: pvfadd.up %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x float> undef, float %s1, i32 0
+ %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+
+declare <256 x double> @llvm.vp.fadd.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
+
+define fastcc <256 x double> @test_vp_fadd_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fadd_v256f64_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfadd.d %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fadd_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fadd_v256f64_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfadd.d %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x double> undef, double %s0, i32 0
+ %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fadd_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fadd_v256f64_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfadd.d %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x double> undef, double %s1, i32 0
+ %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
diff --git a/llvm/test/CodeGen/VE/Vector/vp_fdiv.ll b/llvm/test/CodeGen/VE/Vector/vp_fdiv.ll
new file mode 100644
index 0000000000000..669e1f9c9307a
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/vp_fdiv.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+declare <256 x float> @llvm.vp.fdiv.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
+
+define fastcc <256 x float> @test_vp_fdiv_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fdiv_v256f32_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfdiv.s %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fdiv_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fdiv_v256f32_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfdiv.s %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x float> undef, float %s0, i32 0
+ %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fdiv_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fdiv_v256f32_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfdiv.s %v0, %v0, %s0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x float> undef, float %s1, i32 0
+ %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+
+declare <256 x double> @llvm.vp.fdiv.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
+
+define fastcc <256 x double> @test_vp_fdiv_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fdiv_v256f64_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfdiv.d %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fdiv_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fdiv_v256f64_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfdiv.d %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x double> undef, double %s0, i32 0
+ %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fdiv_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fdiv_v256f64_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfdiv.d %v0, %v0, %s0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x double> undef, double %s1, i32 0
+ %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
diff --git a/llvm/test/CodeGen/VE/Vector/vp_fmul.ll b/llvm/test/CodeGen/VE/Vector/vp_fmul.ll
new file mode 100644
index 0000000000000..0277e755f32ca
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/vp_fmul.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+declare <256 x float> @llvm.vp.fmul.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
+
+define fastcc <256 x float> @test_vp_fmul_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fmul_v256f32_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: pvfmul.up %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fmul_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fmul_v256f32_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: pvfmul.up %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x float> undef, float %s0, i32 0
+ %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fmul_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fmul_v256f32_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: pvfmul.up %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x float> undef, float %s1, i32 0
+ %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+
+declare <256 x double> @llvm.vp.fmul.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
+
+define fastcc <256 x double> @test_vp_fmul_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fmul_v256f64_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfmul.d %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fmul_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fmul_v256f64_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmul.d %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x double> undef, double %s0, i32 0
+ %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fmul_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fmul_v256f64_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfmul.d %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x double> undef, double %s1, i32 0
+ %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
diff --git a/llvm/test/CodeGen/VE/Vector/vp_fsub.ll b/llvm/test/CodeGen/VE/Vector/vp_fsub.ll
new file mode 100644
index 0000000000000..8f51522ee681b
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/vp_fsub.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+declare <256 x float> @llvm.vp.fsub.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
+
+define fastcc <256 x float> @test_vp_fsub_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fsub_v256f32_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: pvfsub.up %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fsub_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fsub_v256f32_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: pvfsub.up %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x float> undef, float %s0, i32 0
+ %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+define fastcc <256 x float> @test_vp_fsub_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fsub_v256f32_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lea %s2, 256
+; CHECK-NEXT: lvl %s2
+; CHECK-NEXT: vbrd %v1, %s0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: pvfsub.up %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x float> undef, float %s1, i32 0
+ %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x float> %r0
+}
+
+
+declare <256 x double> @llvm.vp.fsub.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
+
+define fastcc <256 x double> @test_vp_fsub_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fsub_v256f64_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vfsub.d %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fsub_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fsub_v256f64_rv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfsub.d %v0, %s0, %v0, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %xins = insertelement <256 x double> undef, double %s0, i32 0
+ %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
+
+define fastcc <256 x double> @test_vp_fsub_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_fsub_v256f64_vr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lea %s2, 256
+; CHECK-NEXT: lvl %s2
+; CHECK-NEXT: vbrd %v1, %s0
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vfsub.d %v0, %v0, %v1, %vm1
+; CHECK-NEXT: b.l.t (, %s10)
+ %yins = insertelement <256 x double> undef, double %s1, i32 0
+ %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
+ %r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
+ ret <256 x double> %r0
+}
More information about the llvm-commits
mailing list