[clang] 3136cbe - [PowerPC] Implement Vector Shift Builtins

Wed Aug 12 16:27:11 PDT 2020

Author: Albion Fung
Date: 2020-08-12T18:26:58-05:00
New Revision: 3136cbe29e74e19e6cb71c5ce71e4b92a63d03d8

URL: https://github.com/llvm/llvm-project/commit/3136cbe29e74e19e6cb71c5ce71e4b92a63d03d8
DIFF: https://github.com/llvm/llvm-project/commit/3136cbe29e74e19e6cb71c5ce71e4b92a63d03d8.diff

LOG: [PowerPC]  Implement Vector Shift Builtins

This patch implements the builtins for the vector shifts (shl, srl, sra), and
adds the appropriate test cases for these builtins. The builtins utilize the
vector shift instructions introduced within ISA 3.1.

Differential Revision: https://reviews.llvm.org/D83338

Added: 
    llvm/test/CodeGen/PowerPC/p10-vector-shift.ll

Modified: 
    clang/lib/Headers/altivec.h
    clang/test/CodeGen/builtins-ppc-p10vector.c
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCInstrPrefix.td

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index d0a3b198351c..ac4182613cdd 100644

--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -17321,6 +17321,53 @@ vec_test_lsbb_all_zeros(vector unsigned char __a) {
   return __builtin_vsx_xvtlsbb(__a, 0);
 }
 #endif /* __VSX__ */
+
+/* vs[l | r | ra] */
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_sl(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return __a << (__b % (vector unsigned __int128)(sizeof(unsigned __int128) *
+                                                  __CHAR_BIT__));
+}
+
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_sl(vector signed __int128 __a, vector unsigned __int128 __b) {
+  return __a << (__b % (vector unsigned __int128)(sizeof(unsigned __int128) *
+                                                  __CHAR_BIT__));
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_sr(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return __a >> (__b % (vector unsigned __int128)(sizeof(unsigned __int128) *
+                                                  __CHAR_BIT__));
+}
+
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_sr(vector signed __int128 __a, vector unsigned __int128 __b) {
+  return (
+      vector signed __int128)(((vector unsigned __int128)__a) >>
+                              (__b %
+                               (vector unsigned __int128)(sizeof(
+                                                              unsigned __int128) *
+                                                          __CHAR_BIT__)));
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_sra(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return (
+      vector unsigned __int128)(((vector signed __int128)__a) >>
+                                (__b %
+                                 (vector unsigned __int128)(sizeof(
+                                                                unsigned __int128) *
+                                                            __CHAR_BIT__)));
+}
+
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_sra(vector signed __int128 __a, vector unsigned __int128 __b) {
+  return __a >> (__b % (vector unsigned __int128)(sizeof(unsigned __int128) *
+                                                  __CHAR_BIT__));
+}
+
 #endif /* __POWER10_VECTOR__ */
 
 #undef __ATTRS_o_ai

diff  --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
index 2279d9c57c86..a575f5a924c5 100644
--- a/clang/test/CodeGen/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -8,6 +8,7 @@
 
 #include <altivec.h>
 
+vector signed __int128 vi128a;
 vector signed char vsca, vscb;
 vector unsigned char vuca, vucb, vucc;
 vector signed short vssa, vssb;
@@ -778,6 +779,49 @@ void test_vec_xst_trunc_ull(vector unsigned __int128 __a, signed long long __b,
   vec_xst_trunc(__a, __b, __c);
 }
 
+vector unsigned __int128 test_vec_slq_unsigned (void) {
+  // CHECK-LABEL: test_vec_slq_unsigned
+  // CHECK: shl <1 x i128> %{{.+}}, %{{.+}}
+  // CHECK: ret <1 x i128> %{{.+}}
+  return vec_sl(vui128a, vui128b);
+}
+
+vector signed __int128 test_vec_slq_signed (void) {
+  // CHECK-LABEL: test_vec_slq_signed
+  // CHECK: shl <1 x i128> %{{.+}}, %{{.+}}
+  // CHECK: ret <1 x i128>
+  return vec_sl(vi128a, vui128a);
+}
+
+vector unsigned __int128 test_vec_srq_unsigned (void) {
+  // CHECK-LABEL: test_vec_srq_unsigned
+  // CHECK: lshr <1 x i128> %{{.+}}, %{{.+}}
+  // CHECK: ret <1 x i128>
+  return vec_sr(vui128a, vui128b);
+}
+
+vector signed __int128 test_vec_srq_signed (void) {
+  // CHECK-LABEL: test_vec_srq_signed
+  // CHECK: lshr <1 x i128> %{{.+}}, %{{.+}}
+  // CHECK: ret <1 x i128>
+  return vec_sr(vi128a, vui128a);
+}
+
+vector unsigned __int128 test_vec_sraq_unsigned (void) {
+  // CHECK-LABEL: test_vec_sraq_unsigned
+  // CHECK: ashr <1 x i128> %{{.+}}, %{{.+}}
+  // CHECK: ret <1 x i128>
+  return vec_sra(vui128a, vui128b);
+}
+
+vector signed __int128 test_vec_sraq_signed (void) {
+  // CHECK-LABEL: test_vec_sraq_signed
+  // CHECK: ashr <1 x i128> %{{.+}}, %{{.+}}
+  // CHECK: ret <1 x i128>
+  return vec_sra(vi128a, vui128a);
+}
+
+
 int test_vec_test_lsbb_all_ones(void) {
   // CHECK: @llvm.ppc.vsx.xvtlsbb(<16 x i8> %{{.+}}, i32 1
   // CHECK-NEXT: ret i32

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 296d0922d16d..986fe4c6493f 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1128,6 +1128,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   if (Subtarget.has64BitSupport())
     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
 
+  if (Subtarget.isISA3_1())
+    setOperationAction(ISD::SRA, MVT::v1i128, Legal);
+
   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
 
   if (!isPPC64) {

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index cb2997a04ef7..e86e7828c075 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1288,6 +1288,18 @@ let Predicates = [IsISA3_1] in {
             (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>;
   def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)),
             (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
+  def : Pat<(v1i128 (shl v1i128:$VRA, v1i128:$VRB)),
+            (v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCshl v1i128:$VRA, v1i128:$VRB)),
+            (v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (srl v1i128:$VRA, v1i128:$VRB)),
+            (v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCsrl v1i128:$VRA, v1i128:$VRB)),
+            (v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (sra v1i128:$VRA, v1i128:$VRB)),
+            (v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)),
+            (v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
 }
 
 let AddedComplexity = 400, Predicates = [IsISA3_1] in {

diff  --git a/llvm/test/CodeGen/PowerPC/p10-vector-shift.ll b/llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
new file mode 100644
index 000000000000..5055c02786ac
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; These test cases demonstrate that the vector shift quadword instructions
+; introduced within Power10 are correctly exploited.
+
+define dso_local <1 x i128> @test_vec_vslq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vslq:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vslq v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %rem = urem <1 x i128> %b, <i128 128>
+  %shl = shl <1 x i128> %a, %rem
+  ret <1 x i128> %shl
+}
+
+define dso_local <1 x i128> @test_vec_vsrq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsrq:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsrq v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %rem = urem <1 x i128> %b, <i128 128>
+  %shr = lshr <1 x i128> %a, %rem
+  ret <1 x i128> %shr
+}
+
+define dso_local <1 x i128> @test_vec_vsraq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsraq:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsraq v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %rem = urem <1 x i128> %b, <i128 128>
+  %shr = ashr <1 x i128> %a, %rem
+  ret <1 x i128> %shr
+}
+
+define dso_local <1 x i128> @test_vec_vslq2(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vslq2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vslq v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %shl = shl <1 x i128> %a, %b
+  ret <1 x i128> %shl
+}
+
+define dso_local <1 x i128> @test_vec_vsrq2(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsrq2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsrq v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %shr = lshr <1 x i128> %a, %b
+  ret <1 x i128> %shr
+}
+
+define dso_local <1 x i128> @test_vec_vsraq2(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsraq2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsraq v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %shr = ashr <1 x i128> %a, %b
+  ret <1 x i128> %shr
+}