[llvm] 76b0f99 - [PowerPC] Implement Vector Multiply High/Divide Extended Builtins in LLVM/Clang
Amy Kwan via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 26 21:14:52 PDT 2020
Author: Amy Kwan
Date: 2020-08-26T23:14:34-05:00
New Revision: 76b0f99ea854185c9866b0ab0f006137ba28e09e
URL: https://github.com/llvm/llvm-project/commit/76b0f99ea854185c9866b0ab0f006137ba28e09e
DIFF: https://github.com/llvm/llvm-project/commit/76b0f99ea854185c9866b0ab0f006137ba28e09e.diff
LOG: [PowerPC] Implement Vector Multiply High/Divide Extended Builtins in LLVM/Clang
This patch implements the function prototypes vec_mulh and vec_dive in order to
utilize the vector multiply high (vmulh[s|u][w|d]) and vector divide extended
(vdive[s|u][w|d]) instructions introduced in Power10.
Differential Revision: https://reviews.llvm.org/D82609
Added:
Modified:
clang/include/clang/Basic/BuiltinsPPC.def
clang/lib/Headers/altivec.h
clang/test/CodeGen/builtins-ppc-p10vector.c
llvm/include/llvm/IR/IntrinsicsPowerPC.td
llvm/lib/Target/PowerPC/PPCInstrPrefix.td
llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 566420d5dce9..9a33ba06d82e 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -305,6 +305,18 @@ BUILTIN(__builtin_altivec_vextractwm, "UiV4Ui", "")
BUILTIN(__builtin_altivec_vextractdm, "UiV2ULLi", "")
BUILTIN(__builtin_altivec_vextractqm, "UiV1ULLLi", "")
+// P10 Vector Divide Extended built-ins.
+BUILTIN(__builtin_altivec_vdivesw, "V4SiV4SiV4Si", "")
+BUILTIN(__builtin_altivec_vdiveuw, "V4UiV4UiV4Ui", "")
+BUILTIN(__builtin_altivec_vdivesd, "V2LLiV2LLiV2LLi", "")
+BUILTIN(__builtin_altivec_vdiveud, "V2ULLiV2ULLiV2ULLi", "")
+
+// P10 Vector Multiply High built-ins.
+BUILTIN(__builtin_altivec_vmulhsw, "V4SiV4SiV4Si", "")
+BUILTIN(__builtin_altivec_vmulhuw, "V4UiV4UiV4Ui", "")
+BUILTIN(__builtin_altivec_vmulhsd, "V2LLiV2LLiV2LLi", "")
+BUILTIN(__builtin_altivec_vmulhud, "V2ULLiV2ULLiV2ULLi", "")
+
// P10 Vector Parallel Bits built-ins.
BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "")
BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "")
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index b1e70f6c41bb..6583b0f22a16 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -3288,6 +3288,30 @@ static __inline__ vector double __ATTRS_o_ai vec_div(vector double __a,
}
#endif
+/* vec_dive */
+
+#ifdef __POWER10_VECTOR__
+static __inline__ vector signed int __ATTRS_o_ai
+vec_dive(vector signed int __a, vector signed int __b) {
+ return __builtin_altivec_vdivesw(__a, __b);
+}
+
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_dive(vector unsigned int __a, vector unsigned int __b) {
+ return __builtin_altivec_vdiveuw(__a, __b);
+}
+
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_dive(vector signed long long __a, vector signed long long __b) {
+ return __builtin_altivec_vdivesd(__a, __b);
+}
+
+static __inline__ vector unsigned long long __ATTRS_o_ai
+vec_dive(vector unsigned long long __a, vector unsigned long long __b) {
+ return __builtin_altivec_vdiveud(__a, __b);
+}
+#endif
+
/* vec_dss */
#define vec_dss __builtin_altivec_dss
@@ -5737,6 +5761,30 @@ vec_vmuleuh(vector unsigned short __a, vector unsigned short __b) {
#endif
}
+/* vec_mulh */
+
+#ifdef __POWER10_VECTOR__
+static __inline__ vector signed int __ATTRS_o_ai
+vec_mulh(vector signed int __a, vector signed int __b) {
+ return __builtin_altivec_vmulhsw(__a, __b);
+}
+
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_mulh(vector unsigned int __a, vector unsigned int __b) {
+ return __builtin_altivec_vmulhuw(__a, __b);
+}
+
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_mulh(vector signed long long __a, vector signed long long __b) {
+ return __builtin_altivec_vmulhsd(__a, __b);
+}
+
+static __inline__ vector unsigned long long __ATTRS_o_ai
+vec_mulh(vector unsigned long long __a, vector unsigned long long __b) {
+ return __builtin_altivec_vmulhud(__a, __b);
+}
+#endif
+
/* vec_mulo */
static __inline__ vector short __ATTRS_o_ai vec_mulo(vector signed char __a,
diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
index fe3e678a5794..16e468b62318 100644
--- a/clang/test/CodeGen/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -61,6 +61,54 @@ vector unsigned long long test_vec_div_ull(void) {
return vec_div(vulla, vullb);
}
+vector signed int test_vec_dive_si(void) {
+ // CHECK: @llvm.ppc.altivec.vdivesw(<4 x i32> %{{.+}}, <4 x i32> %{{.+}})
+ // CHECK-NEXT: ret <4 x i32>
+ return vec_dive(vsia, vsib);
+}
+
+vector unsigned int test_vec_dive_ui(void) {
+ // CHECK: @llvm.ppc.altivec.vdiveuw(<4 x i32> %{{.+}}, <4 x i32> %{{.+}})
+ // CHECK-NEXT: ret <4 x i32>
+ return vec_dive(vuia, vuib);
+}
+
+vector signed long long test_vec_dive_sll(void) {
+ // CHECK: @llvm.ppc.altivec.vdivesd(<2 x i64> %{{.+}}, <2 x i64> %{{.+}})
+ // CHECK-NEXT: ret <2 x i64>
+ return vec_dive(vslla, vsllb);
+}
+
+vector unsigned long long test_vec_dive_ull(void) {
+ // CHECK: @llvm.ppc.altivec.vdiveud(<2 x i64> %{{.+}}, <2 x i64> %{{.+}})
+ // CHECK-NEXT: ret <2 x i64>
+ return vec_dive(vulla, vullb);
+}
+
+vector signed int test_vec_mulh_si(void) {
+ // CHECK: @llvm.ppc.altivec.vmulhsw(<4 x i32> %{{.+}}, <4 x i32> %{{.+}})
+ // CHECK-NEXT: ret <4 x i32>
+ return vec_mulh(vsia, vsib);
+}
+
+vector unsigned int test_vec_mulh_ui(void) {
+ // CHECK: @llvm.ppc.altivec.vmulhuw(<4 x i32> %{{.+}}, <4 x i32> %{{.+}})
+ // CHECK-NEXT: ret <4 x i32>
+ return vec_mulh(vuia, vuib);
+}
+
+vector signed long long test_vec_mulh_sll(void) {
+ // CHECK: @llvm.ppc.altivec.vmulhsd(<2 x i64> %{{.+}}, <2 x i64> %{{.+}})
+ // CHECK-NEXT: ret <2 x i64>
+ return vec_mulh(vslla, vsllb);
+}
+
+vector unsigned long long test_vec_mulh_ull(void) {
+ // CHECK: @llvm.ppc.altivec.vmulhud(<2 x i64> %{{.+}}, <2 x i64> %{{.+}})
+ // CHECK-NEXT: ret <2 x i64>
+ return vec_mulh(vulla, vullb);
+}
+
vector signed int test_vec_mod_si(void) {
// CHECK: srem <4 x i32>
// CHECK-NEXT: ret <4 x i32>
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index b2e542994de9..2ff045865bb7 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -950,6 +950,18 @@ def int_ppc_altivec_vrldmi :
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
+// Vector Divide Extended Intrinsics.
+def int_ppc_altivec_vdivesw : PowerPC_Vec_WWW_Intrinsic<"vdivesw">;
+def int_ppc_altivec_vdiveuw : PowerPC_Vec_WWW_Intrinsic<"vdiveuw">;
+def int_ppc_altivec_vdivesd : PowerPC_Vec_DDD_Intrinsic<"vdivesd">;
+def int_ppc_altivec_vdiveud : PowerPC_Vec_DDD_Intrinsic<"vdiveud">;
+
+// Vector Multiply High Intrinsics.
+def int_ppc_altivec_vmulhsw : PowerPC_Vec_WWW_Intrinsic<"vmulhsw">;
+def int_ppc_altivec_vmulhuw : PowerPC_Vec_WWW_Intrinsic<"vmulhuw">;
+def int_ppc_altivec_vmulhsd : PowerPC_Vec_DDD_Intrinsic<"vmulhsd">;
+def int_ppc_altivec_vmulhud : PowerPC_Vec_DDD_Intrinsic<"vmulhud">;
+
//===----------------------------------------------------------------------===//
// PowerPC VSX Intrinsic Definitions.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 08ff5435f887..491d969861e1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1207,13 +1207,21 @@ let Predicates = [IsISA3_1] in {
"vdivud $vD, $vA, $vB", IIC_VecGeneral,
[(set v2i64:$vD, (udiv v2i64:$vA, v2i64:$vB))]>;
def VDIVESW : VXForm_1<907, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vdivesw $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vdivesw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (int_ppc_altivec_vdivesw v4i32:$vA,
+ v4i32:$vB))]>;
def VDIVEUW : VXForm_1<651, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vdiveuw $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vdiveuw $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (int_ppc_altivec_vdiveuw v4i32:$vA,
+ v4i32:$vB))]>;
def VDIVESD : VXForm_1<971, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vdivesd $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vdivesd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (int_ppc_altivec_vdivesd v2i64:$vA,
+ v2i64:$vB))]>;
def VDIVEUD : VXForm_1<715, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vdiveud $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vdiveud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (int_ppc_altivec_vdiveud v2i64:$vA,
+ v2i64:$vB))]>;
def XVTLSBB : XX2_BF3_XO5_XB6_XO9<60, 2, 475, (outs crrc:$BF), (ins vsrc:$XB),
"xvtlsbb $BF, $XB", IIC_VecGeneral, []>;
@@ -1285,6 +1293,15 @@ let Predicates = [IsISA3_1] in {
//---------------------------- Anonymous Patterns ----------------------------//
let Predicates = [IsISA3_1] in {
+ // Exploit the vector multiply high instructions using intrinsics.
+ def : Pat<(v4i32 (int_ppc_altivec_vmulhsw v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VMULHSW $vA, $vB))>;
+ def : Pat<(v4i32 (int_ppc_altivec_vmulhuw v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VMULHUW $vA, $vB))>;
+ def : Pat<(v2i64 (int_ppc_altivec_vmulhsd v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VMULHSD $vA, $vB))>;
+ def : Pat<(v2i64 (int_ppc_altivec_vmulhud v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VMULHUD $vA, $vB))>;
def : Pat<(v16i8 (int_ppc_vsx_xxgenpcvbm v16i8:$VRB, imm:$IMM)),
(v16i8 (COPY_TO_REGCLASS (XXGENPCVBM $VRB, imm:$IMM), VRRC))>;
def : Pat<(v8i16 (int_ppc_vsx_xxgenpcvhm v8i16:$VRB, imm:$IMM)),
diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll b/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
index 4ecc3a17fedb..dc21b4fb49ee 100644
--- a/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
@@ -49,3 +49,49 @@ entry:
%div = sdiv <4 x i32> %a, %b
ret <4 x i32> %div
}
+
+; Test the vector divide extended intrinsics.
+declare <4 x i32> @llvm.ppc.altivec.vdivesw(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.ppc.altivec.vdiveuw(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.ppc.altivec.vdivesd(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.ppc.altivec.vdiveud(<2 x i64>, <2 x i64>)
+
+define <4 x i32> @test_vdivesw(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vdivesw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vdivesw v2, v2, v3
+; CHECK-NEXT: blr
+entry:
+ %div = tail call <4 x i32> @llvm.ppc.altivec.vdivesw(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %div
+}
+
+define <4 x i32> @test_vdiveuw(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vdiveuw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vdiveuw v2, v2, v3
+; CHECK-NEXT: blr
+entry:
+ %div = tail call <4 x i32> @llvm.ppc.altivec.vdiveuw(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %div
+}
+
+define <2 x i64> @test_vdivesd(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vdivesd:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vdivesd v2, v2, v3
+; CHECK-NEXT: blr
+entry:
+ %div = tail call <2 x i64> @llvm.ppc.altivec.vdivesd(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %div
+}
+
+define <2 x i64> @test_vdiveud(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vdiveud:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vdiveud v2, v2, v3
+; CHECK-NEXT: blr
+entry:
+ %div = tail call <2 x i64> @llvm.ppc.altivec.vdiveud(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %div
+}
diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll b/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
index 75c6d8c24038..fd58654d0ae1 100644
--- a/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
@@ -76,3 +76,49 @@ entry:
%tr = trunc <4 x i64> %shr to <4 x i32>
ret <4 x i32> %tr
}
+
+; Test the vector multiply high intrinsics.
+declare <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64>, <2 x i64>)
+
+define <4 x i32> @test_vmulhsw_intrinsic(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmulhsw_intrinsic:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmulhsw v2, v2, v3
+; CHECK-NEXT: blr
+entry:
+ %mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %mulh
+}
+
+define <4 x i32> @test_vmulhuw_intrinsic(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmulhuw_intrinsic:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmulhuw v2, v2, v3
+; CHECK-NEXT: blr
+entry:
+ %mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %mulh
+}
+
+define <2 x i64> @test_vmulhsd_intrinsic(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vmulhsd_intrinsic:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmulhsd v2, v2, v3
+; CHECK-NEXT: blr
+entry:
+ %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %mulh
+}
+
+define <2 x i64> @test_vmulhud_intrinsic(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vmulhud_intrinsic:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmulhud v2, v2, v3
+; CHECK-NEXT: blr
+entry:
+ %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %mulh
+}
More information about the llvm-commits
mailing list