[llvm] b314705 - [PowerPC] Implement the 128-bit Vector Divide Extended Builtins in Clang/LLVM

Tue Sep 22 09:32:29 PDT 2020

Author: Amy Kwan
Date: 2020-09-22T11:31:44-05:00
New Revision: b3147058dec7d42ae0284d6e6edf25eb762c8b89

URL: https://github.com/llvm/llvm-project/commit/b3147058dec7d42ae0284d6e6edf25eb762c8b89
DIFF: https://github.com/llvm/llvm-project/commit/b3147058dec7d42ae0284d6e6edf25eb762c8b89.diff

LOG: [PowerPC] Implement the 128-bit Vector Divide Extended Builtins in Clang/LLVM

This patch implements the 128-bit vector divide extended builtins in Clang/LLVM.
These builtins map to the vdivesq and vdiveuq instructions respectively.

Differential Revision: https://reviews.llvm.org/D87729

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsPPC.def
    clang/lib/Headers/altivec.h
    clang/test/CodeGen/builtins-ppc-p10vector.c
    llvm/include/llvm/IR/IntrinsicsPowerPC.td
    llvm/lib/Target/PowerPC/PPCInstrPrefix.td
    llvm/test/CodeGen/PowerPC/p10-vector-divide.ll

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 777932a2e910..dea547a6e121 100644

--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -315,6 +315,8 @@ BUILTIN(__builtin_altivec_vdivesw, "V4SiV4SiV4Si", "")
 BUILTIN(__builtin_altivec_vdiveuw, "V4UiV4UiV4Ui", "")
 BUILTIN(__builtin_altivec_vdivesd, "V2LLiV2LLiV2LLi", "")
 BUILTIN(__builtin_altivec_vdiveud, "V2ULLiV2ULLiV2ULLi", "")
+BUILTIN(__builtin_altivec_vdivesq, "V1SLLLiV1SLLLiV1SLLLi", "")
+BUILTIN(__builtin_altivec_vdiveuq, "V1ULLLiV1ULLLiV1ULLLi", "")
 
 // P10 Vector Multiply High built-ins.
 BUILTIN(__builtin_altivec_vmulhsw, "V4SiV4SiV4Si", "")

diff  --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 905d06bdae68..010a00e15b74 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -3366,6 +3366,16 @@ static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_dive(vector unsigned long long __a, vector unsigned long long __b) {
   return __builtin_altivec_vdiveud(__a, __b);
 }
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_dive(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return __builtin_altivec_vdiveuq(__a, __b);
+}
+
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_dive(vector signed __int128 __a, vector signed __int128 __b) {
+  return __builtin_altivec_vdivesq(__a, __b);
+}
 #endif
 
 #ifdef __POWER10_VECTOR__

diff  --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
index 8557446de800..2f96cdfd9d68 100644
--- a/clang/test/CodeGen/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -106,6 +106,18 @@ vector unsigned long long test_vec_dive_ull(void) {
   return vec_dive(vulla, vullb);
 }
 
+vector unsigned __int128 test_vec_dive_u128(void) {
+  // CHECK: @llvm.ppc.altivec.vdiveuq(<1 x i128> %{{.+}}, <1 x i128> %{{.+}})
+  // CHECK-NEXT: ret <1 x i128>
+  return vec_dive(vui128a, vui128b);
+}
+
+vector signed __int128 test_vec_dive_s128(void) {
+  // CHECK: @llvm.ppc.altivec.vdivesq(<1 x i128> %{{.+}}, <1 x i128> %{{.+}})
+  // CHECK-NEXT: ret <1 x i128>
+  return vec_dive(vsi128a, vsi128b);
+}
+
 vector signed int test_vec_mulh_si(void) {
   // CHECK: @llvm.ppc.altivec.vmulhsw(<4 x i32> %{{.+}}, <4 x i32> %{{.+}})
   // CHECK-NEXT: ret <4 x i32>

diff  --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index dc7fa581fd45..79cf3efbfac4 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1007,6 +1007,8 @@ def int_ppc_altivec_vdivesw : PowerPC_Vec_WWW_Intrinsic<"vdivesw">;
 def int_ppc_altivec_vdiveuw : PowerPC_Vec_WWW_Intrinsic<"vdiveuw">;
 def int_ppc_altivec_vdivesd : PowerPC_Vec_DDD_Intrinsic<"vdivesd">;
 def int_ppc_altivec_vdiveud : PowerPC_Vec_DDD_Intrinsic<"vdiveud">;
+def int_ppc_altivec_vdivesq : PowerPC_Vec_QQQ_Intrinsic<"vdivesq">;
+def int_ppc_altivec_vdiveuq : PowerPC_Vec_QQQ_Intrinsic<"vdiveuq">;
 
 // Vector Multiply High Intrinsics.
 def int_ppc_altivec_vmulhsw : PowerPC_Vec_WWW_Intrinsic<"vmulhsw">;

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 351f08dadadb..114213321f35 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1427,9 +1427,13 @@ let Predicates = [IsISA3_1] in {
                         "vdivuq $vD, $vA, $vB", IIC_VecGeneral,
                         [(set v1i128:$vD, (udiv v1i128:$vA, v1i128:$vB))]>;
   def VDIVESQ : VXForm_1<779, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                         "vdivesq $vD, $vA, $vB", IIC_VecGeneral, []>;
+                         "vdivesq $vD, $vA, $vB", IIC_VecGeneral,
+                         [(set v1i128:$vD, (int_ppc_altivec_vdivesq v1i128:$vA,
+			       v1i128:$vB))]>;
   def VDIVEUQ : VXForm_1<523, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                         "vdiveuq $vD, $vA, $vB", IIC_VecGeneral, []>;
+                         "vdiveuq $vD, $vA, $vB", IIC_VecGeneral,
+                         [(set v1i128:$vD, (int_ppc_altivec_vdiveuq v1i128:$vA,
+			       v1i128:$vB))]>;
   def VCMPEQUQ : VCMP <455, "vcmpequq $vD, $vA, $vB" , v1i128>;
   def VCMPGTSQ : VCMP <903, "vcmpgtsq $vD, $vA, $vB" , v1i128>;
   def VCMPGTUQ : VCMP <647, "vcmpgtuq $vD, $vA, $vB" , v1i128>;

diff  --git a/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll b/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
index b5f36a78b2b2..7f46f06c9f5d 100644
--- a/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
@@ -9,6 +9,7 @@
 ; This test case aims to test the vector divide instructions on Power10.
 ; This includes the low order and extended versions of vector divide,
 ; that operate on signed and unsigned words and doublewords.
+; This also includes 128 bit vector divide instructions.
 
 define <2 x i64> @test_vdivud(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vdivud:
@@ -113,3 +114,25 @@ entry:
   %div = tail call <2 x i64> @llvm.ppc.altivec.vdiveud(<2 x i64> %a, <2 x i64> %b)
   ret <2 x i64> %div
 }
+
+declare <1 x i128> @llvm.ppc.altivec.vdivesq(<1 x i128>, <1 x i128>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vdiveuq(<1 x i128>, <1 x i128>) nounwind readnone
+
+define <1 x i128> @test_vdivesq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+; CHECK-LABEL: test_vdivesq:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vdivesq v2, v2, v3
+; CHECK-NEXT:    blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vdivesq(<1 x i128> %x, <1 x i128> %y)
+  ret <1 x i128> %tmp
+}
+
+
+define <1 x i128> @test_vdiveuq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+; CHECK-LABEL: test_vdiveuq:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vdiveuq v2, v2, v3
+; CHECK-NEXT:    blr
+  %tmp = call <1 x i128> @llvm.ppc.altivec.vdiveuq(<1 x i128> %x, <1 x i128> %y)
+  ret <1 x i128> %tmp
+}