[llvm] d7eb917 - [PowerPC] Implementation of 128-bit Binary Vector Mod and Sign Extend builtins

Albion Fung via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 22 23:19:01 PDT 2020


Author: Albion Fung
Date: 2020-09-23T01:18:14-05:00
New Revision: d7eb917a7cb793f49e16841fc24826b988dd5c8f

URL: https://github.com/llvm/llvm-project/commit/d7eb917a7cb793f49e16841fc24826b988dd5c8f
DIFF: https://github.com/llvm/llvm-project/commit/d7eb917a7cb793f49e16841fc24826b988dd5c8f.diff

LOG: [PowerPC] Implementation of 128-bit Binary Vector Mod and Sign Extend builtins

This patch implements 128-bit Binary Vector Mod and Sign Extend builtins for PowerPC10.

Differential: https://reviews.llvm.org/D87394#inline-815858

Added: 
    llvm/test/CodeGen/PowerPC/p10-vector-sign-extend.ll
    llvm/test/CodeGen/PowerPC/p9-vector-sign-extend.ll

Modified: 
    clang/include/clang/Basic/BuiltinsPPC.def
    clang/lib/Headers/altivec.h
    clang/test/CodeGen/builtins-ppc-p10vector.c
    clang/test/CodeGen/builtins-ppc-p9vector.c
    llvm/include/llvm/IR/IntrinsicsPowerPC.td
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCInstrAltivec.td
    llvm/lib/Target/PowerPC/PPCInstrPrefix.td
    llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index b571454cfc7a..5de3584a2755 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -303,6 +303,16 @@ BUILTIN(__builtin_altivec_vrldmi, "V2ULLiV2ULLiV2ULLiV2ULLi", "")
 BUILTIN(__builtin_altivec_vrlwnm, "V4UiV4UiV4Ui", "")
 BUILTIN(__builtin_altivec_vrldnm, "V2ULLiV2ULLiV2ULLi", "")
 
+// P9 Vector extend sign builtins.
+BUILTIN(__builtin_altivec_vextsb2w, "V4SiV16Sc", "")
+BUILTIN(__builtin_altivec_vextsb2d, "V2SLLiV16Sc", "")
+BUILTIN(__builtin_altivec_vextsh2w, "V4SiV8Ss", "")
+BUILTIN(__builtin_altivec_vextsh2d, "V2SLLiV8Ss", "")
+BUILTIN(__builtin_altivec_vextsw2d, "V2SLLiV4Si", "")
+
+// P10 Vector extend sign builtins.
+BUILTIN(__builtin_altivec_vextsd2q, "V1SLLLiV2SLLi", "")
+
 // P10 Vector Extract with Mask built-ins.
 BUILTIN(__builtin_altivec_vextractbm, "UiV16Uc", "")
 BUILTIN(__builtin_altivec_vextracthm, "UiV8Us", "")

diff  --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 2c09e477bd3c..b07e45d3c5a5 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -3007,6 +3007,42 @@ static __inline__ vector double __ATTRS_o_ai vec_cpsgn(vector double __a,
 
 #define vec_vctuxs __builtin_altivec_vctuxs
 
+/* vec_signext */
+
+#ifdef __POWER9_VECTOR__
+static __inline__ vector signed int __ATTRS_o_ai
+vec_signexti(vector signed char __a) {
+  return __builtin_altivec_vextsb2w(__a);
+}
+
+static __inline__ vector signed int __ATTRS_o_ai
+vec_signexti(vector signed short __a) {
+  return __builtin_altivec_vextsh2w(__a);
+}
+
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_signextll(vector signed char __a) {
+  return __builtin_altivec_vextsb2d(__a);
+}
+
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_signextll(vector signed short __a) {
+  return __builtin_altivec_vextsh2d(__a);
+}
+
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_signextll(vector signed int __a) {
+  return __builtin_altivec_vextsw2d(__a);
+}
+#endif
+
+#ifdef __POWER10_VECTOR__
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_signextq(vector signed long long __a) {
+  return __builtin_altivec_vextsd2q(__a);
+}
+#endif
+
 /* vec_signed */
 
 static __inline__ vector signed int __ATTRS_o_ai
@@ -17269,6 +17305,16 @@ vec_mod(vector unsigned long long __a, vector unsigned long long __b) {
   return __a % __b;
 }
 
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_mod(vector signed __int128 __a, vector signed __int128 __b) {
+  return __a % __b;
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_mod(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return  __a % __b;
+}
+
 /* vec_sldbi */
 
 #define vec_sldb(__a, __b, __c) __builtin_altivec_vsldbi(__a, __b, (__c & 0x7))

diff  --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
index 89f49adf28e9..b6788d783a5d 100644
--- a/clang/test/CodeGen/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -1481,3 +1481,21 @@ vector unsigned __int128 test_vec_xl_zext_i64(void) {
   // CHECK: ret <1 x i128>
   return vec_xl_zext(llb, ullap);
 }
+
+vector signed __int128 test_vec_signextq_s128(void) {
+    // CHECK: @llvm.ppc.altivec.vextsd2q(<2 x i64>
+    // CHECK-NEXT: ret <1 x i128>
+    return vec_signextq(vslla);
+}
+
+vector unsigned __int128 test_vec_mod_u128(void) {
+    // CHECK: urem <1 x i128>
+    // CHECK-NEXT: ret <1 x i128>
+    return vec_mod(vui128a, vui128b);
+}
+
+vector signed __int128 test_vec_mod_s128(void) {
+    // CHECK: srem <1 x i128>
+    // CHECK-NEXT: ret <1 x i128>
+    return vec_mod(vsi128a, vsi128b);
+}

diff  --git a/clang/test/CodeGen/builtins-ppc-p9vector.c b/clang/test/CodeGen/builtins-ppc-p9vector.c
index e920cb76f4d9..0fbcdc566253 100644
--- a/clang/test/CodeGen/builtins-ppc-p9vector.c
+++ b/clang/test/CodeGen/builtins-ppc-p9vector.c
@@ -1227,3 +1227,32 @@ vector unsigned long long test119(void) {
   return vec_extract4b(vuca, -5);
 }
 
+vector signed int test_vec_signexti_si_sc(void) {
+    // CHECK: @llvm.ppc.altivec.vextsb2w(<16 x i8>
+    // CHECK-NEXT: ret <4 x i32>
+    return vec_signexti(vsca);
+}
+
+vector signed int test_vec_signexti_si_ss(void) {
+    // CHECK: @llvm.ppc.altivec.vextsh2w(<8 x i16>
+    // CHECK-NEXT: ret <4 x i32>
+    return vec_signexti(vssa);
+}
+
+vector signed long long test_vec_signextll_sll_sc(void) {
+    // CHECK: @llvm.ppc.altivec.vextsb2d(<16 x i8>
+    // CHECK-NEXT: ret <2 x i64>
+    return vec_signextll(vsca);
+}
+
+vector signed long long test_vec_signextll_sll_ss(void) {
+    // CHECK: @llvm.ppc.altivec.vextsh2d(<8 x i16>
+    // CHECK-NEXT: ret <2 x i64>
+    return vec_signextll(vssa);
+}
+
+vector signed long long test_vec_signextll_sll_si(void) {
+    // CHECK: @llvm.ppc.altivec.vextsw2d(<4 x i32>
+    // CHECK-NEXT: ret <2 x i64>
+    return vec_signextll(vsia);
+}

diff  --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 219c0d5660fc..f2655d403c9b 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -801,6 +801,20 @@ let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
             Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty],
                       [IntrNoMem]>;
 
+  // Vector Sign Extension Instructions
+  def int_ppc_altivec_vextsb2w : GCCBuiltin<"__builtin_altivec_vextsb2w">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vextsb2d : GCCBuiltin<"__builtin_altivec_vextsb2d">,
+            Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vextsh2w : GCCBuiltin<"__builtin_altivec_vextsh2w">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vextsh2d : GCCBuiltin<"__builtin_altivec_vextsh2d">,
+            Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vextsw2d : GCCBuiltin<"__builtin_altivec_vextsw2d">,
+            Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vextsd2q : GCCBuiltin<"__builtin_altivec_vextsd2q">,
+            Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty], [IntrNoMem]>;
+
   // Other multiplies.
   def int_ppc_altivec_vmladduhm : GCCBuiltin<"__builtin_altivec_vmladduhm">,
             Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 706ccb60e1a6..88ace9e5e5d6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -888,6 +888,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       setOperationAction(ISD::SREM, MVT::v2i64, Legal);
       setOperationAction(ISD::UREM, MVT::v4i32, Legal);
       setOperationAction(ISD::SREM, MVT::v4i32, Legal);
+      setOperationAction(ISD::UREM, MVT::v1i128, Legal);
+      setOperationAction(ISD::SREM, MVT::v1i128, Legal);
       setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
       setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
     }

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 920eeed9d41f..3b65f4da0442 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1449,11 +1449,16 @@ def VCTZD : VX_VT5_EO5_VB5<1538, 31, "vctzd",
                            [(set v2i64:$vD, (cttz v2i64:$vB))]>;
 
 // Vector Extend Sign
-def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w", []>;
-def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w", []>;
-def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d", []>;
-def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d", []>;
-def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d", []>;
+def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w",
+                              [(set v4i32:$vD, (int_ppc_altivec_vextsb2w v16i8:$vB))]>;
+def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w",
+                              [(set v4i32:$vD, (int_ppc_altivec_vextsh2w v8i16:$vB))]>;
+def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d",
+                              [(set v2i64:$vD, (int_ppc_altivec_vextsb2d v16i8:$vB))]>;
+def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d",
+                              [(set v2i64:$vD, (int_ppc_altivec_vextsh2d v8i16:$vB))]>;
+def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d",
+                              [(set v2i64:$vD, (int_ppc_altivec_vextsw2d v4i32:$vB))]>;
 let isCodeGenOnly = 1 in {
   def VEXTSB2Ws : VX_VT5_EO5_VB5s<1538, 16, "vextsb2w", []>;
   def VEXTSH2Ws : VX_VT5_EO5_VB5s<1538, 17, "vextsh2w", []>;

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 1f5f93c28b3a..9111d618fae7 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1449,11 +1449,14 @@ let Predicates = [IsISA3_1] in {
   def VCMPGTSQ_rec : VCMPo <903, "vcmpgtsq. $vD, $vA, $vB" , v1i128>;
   def VCMPGTUQ_rec : VCMPo <647, "vcmpgtuq. $vD, $vA, $vB" , v1i128>;
   def VMODSQ : VXForm_1<1803, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                        "vmodsq $vD, $vA, $vB", IIC_VecGeneral, []>;
+                        "vmodsq $vD, $vA, $vB", IIC_VecGeneral,
+                        [(set v1i128:$vD, (srem v1i128:$vA, v1i128:$vB))]>;
   def VMODUQ : VXForm_1<1547, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                        "vmoduq $vD, $vA, $vB", IIC_VecGeneral, []>;
+                        "vmoduq $vD, $vA, $vB", IIC_VecGeneral,
+                        [(set v1i128:$vD, (urem v1i128:$vA, v1i128:$vB))]>;
   def VEXTSD2Q : VXForm_RD5_XO5_RS5<1538, 27, (outs vrrc:$vD), (ins vrrc:$vB),
-                                    "vextsd2q $vD, $vB", IIC_VecGeneral, []>;
+                               "vextsd2q $vD, $vB", IIC_VecGeneral,
+                               [(set v1i128:$vD, (int_ppc_altivec_vextsd2q v2i64:$vB))]>;
   def VCMPUQ : VXForm_BF3_VAB5<257, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
                                "vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>;
   def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),

diff  --git a/llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll b/llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll
index e4ef0380ae8b..94a8058338cd 100644
--- a/llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll
@@ -10,6 +10,28 @@
 ; The vector modulo instructions operate on signed and unsigned words
 ; and doublewords.
 
+; The vector modulo instructions operate on signed and unsigned words,
+; doublewords and 128-bit values.
+
+
+define <1 x i128> @test_vmodsq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+; CHECK-LABEL: test_vmodsq:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmodsq v2, v2, v3
+; CHECK-NEXT:    blr
+  %tmp = srem <1 x i128> %x, %y
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmoduq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+; CHECK-LABEL: test_vmoduq:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmoduq v2, v2, v3
+; CHECK-NEXT:    blr
+  %tmp = urem <1 x i128> %x, %y
+  ret <1 x i128> %tmp
+}
+
 define <2 x i64> @test_vmodud(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vmodud:
 ; CHECK:       # %bb.0: # %entry

diff  --git a/llvm/test/CodeGen/PowerPC/p10-vector-sign-extend.ll b/llvm/test/CodeGen/PowerPC/p10-vector-sign-extend.ll
new file mode 100644
index 000000000000..f4f68cb367fe
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/p10-vector-sign-extend.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; This test case aims to test vector sign extend builtins.
+
+declare <1 x i128> @llvm.ppc.altivec.vextsd2q(<2 x i64>) nounwind readnone
+
+define <1 x i128> @test_vextsd2q(<2 x i64> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsd2q:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vextsd2q v2, v2
+; CHECK-NEXT:    blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vextsd2q(<2 x i64> %x)
+  ret <1 x i128> %tmp
+}

diff  --git a/llvm/test/CodeGen/PowerPC/p9-vector-sign-extend.ll b/llvm/test/CodeGen/PowerPC/p9-vector-sign-extend.ll
new file mode 100644
index 000000000000..36d6b4144271
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/p9-vector-sign-extend.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; This test case aims to test vector sign extend builtins.
+
+declare <4 x i32> @llvm.ppc.altivec.vextsb2w(<16 x i8>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vextsb2d(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.ppc.altivec.vextsh2w(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vextsh2d(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vextsw2d(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_vextsb2w(<16 x i8> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsb2w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vextsb2w v2, v2
+; CHECK-NEXT:    blr
+  %tmp = tail call <4 x i32> @llvm.ppc.altivec.vextsb2w(<16 x i8> %x)
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_vextsb2d(<16 x i8> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsb2d:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vextsb2d v2, v2
+; CHECK-NEXT:    blr
+  %tmp = tail call <2 x i64> @llvm.ppc.altivec.vextsb2d(<16 x i8> %x)
+  ret <2 x i64> %tmp
+}
+
+define <4 x i32> @test_vextsh2w(<8 x i16> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsh2w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vextsh2w v2, v2
+; CHECK-NEXT:    blr
+  %tmp = tail call <4 x i32> @llvm.ppc.altivec.vextsh2w(<8 x i16> %x)
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_vextsh2d(<8 x i16> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsh2d:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vextsh2d v2, v2
+; CHECK-NEXT:    blr
+  %tmp = tail call <2 x i64> @llvm.ppc.altivec.vextsh2d(<8 x i16> %x)
+  ret <2 x i64> %tmp
+}
+
+define <2 x i64> @test_vextsw2d(<4 x i32> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsw2d:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vextsw2d v2, v2
+; CHECK-NEXT:    blr
+  %tmp = tail call <2 x i64> @llvm.ppc.altivec.vextsw2d(<4 x i32> %x)
+  ret <2 x i64> %tmp
+}


        


More information about the llvm-commits mailing list