[clang] c45c161 - [PowerPC][Power10] Implement Parallel Bits Deposit/Extract Builtins in LLVM/Clang

Amy Kwan via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 18 14:24:19 PDT 2020


Author: Amy Kwan
Date: 2020-06-18T16:23:56-05:00
New Revision: c45c1611303b4609016fa69c1c987ede3bf92006

URL: https://github.com/llvm/llvm-project/commit/c45c1611303b4609016fa69c1c987ede3bf92006
DIFF: https://github.com/llvm/llvm-project/commit/c45c1611303b4609016fa69c1c987ede3bf92006.diff

LOG: [PowerPC][Power10] Implement Parallel Bits Deposit/Extract Builtins in LLVM/Clang

This patch implements builtins for the following prototypes:

vector unsigned long long vec_pdep(vector unsigned long long, vector unsigned long long);
vector unsigned long long vec_pext(vector unsigned long long, vector unsigned long long __b);
unsigned long long __builtin_pdepd (unsigned long long, unsigned long long);
unsigned long long __builtin_pextd (unsigned long long, unsigned long long);

Revision Depends on D80758

Differential Revision: https://reviews.llvm.org/D80935

Added: 
    clang/test/CodeGen/builtins-ppc-p10.c
    clang/test/CodeGen/builtins-ppc-p10vector.c
    llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll
    llvm/test/MC/Disassembler/PowerPC/p10insts.txt
    llvm/test/MC/PowerPC/p10.s

Modified: 
    clang/include/clang/Basic/BuiltinsPPC.def
    clang/lib/Headers/altivec.h
    llvm/include/llvm/IR/IntrinsicsPowerPC.td
    llvm/lib/Target/PowerPC/PPCInstrPrefix.td
    llvm/lib/Target/PowerPC/PPCScheduleP9.td

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 314e1cc05907..30077e2e8d03 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -298,6 +298,10 @@ BUILTIN(__builtin_altivec_vrldmi, "V2ULLiV2ULLiV2ULLiV2ULLi", "")
 BUILTIN(__builtin_altivec_vrlwnm, "V4UiV4UiV4Ui", "")
 BUILTIN(__builtin_altivec_vrldnm, "V2ULLiV2ULLiV2ULLi", "")
 
+// P10 Vector Parallel Bits built-ins.
+BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "")
+BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "")
+
 // VSX built-ins.
 
 BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "")
@@ -470,6 +474,8 @@ BUILTIN(__builtin_divweu, "UiUiUi", "")
 BUILTIN(__builtin_divde, "SLLiSLLiSLLi", "")
 BUILTIN(__builtin_divdeu, "ULLiULLiULLi", "")
 BUILTIN(__builtin_bpermd, "SLLiSLLiSLLi", "")
+BUILTIN(__builtin_pdepd, "ULLiULLiULLi", "")
+BUILTIN(__builtin_pextd, "ULLiULLiULLi", "")
 
 // Vector int128 (un)pack
 BUILTIN(__builtin_unpack_vector_int128, "ULLiV1LLLii", "")

diff  --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 7e231a2a428e..1e1e57cd1ffc 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -16761,6 +16761,23 @@ static vector signed short __ATTRS_o_ai vec_nabs(vector signed short __a) {
 static vector signed char __ATTRS_o_ai vec_nabs(vector signed char __a) {
   return __builtin_altivec_vminsb(__a, -__a);
 }
+
+#ifdef __POWER10_VECTOR__
+/* vec_pdep */
+
+static __inline__ vector unsigned long long __ATTRS_o_ai
+vec_pdep(vector unsigned long long __a, vector unsigned long long __b) {
+  return __builtin_altivec_vpdepd(__a, __b);
+}
+
+/* vec_pext */
+
+static __inline__ vector unsigned long long __ATTRS_o_ai
+vec_pext(vector unsigned long long __a, vector unsigned long long __b) {
+  return __builtin_altivec_vpextd(__a, __b);
+}
+#endif /* __POWER10_VECTOR__ */
+
 #undef __ATTRS_o_ai
 
 #endif /* __ALTIVEC_H */

diff  --git a/clang/test/CodeGen/builtins-ppc-p10.c b/clang/test/CodeGen/builtins-ppc-p10.c
new file mode 100644
index 000000000000..c21e8026d0c9
--- /dev/null
+++ b/clang/test/CodeGen/builtins-ppc-p10.c
@@ -0,0 +1,15 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -target-cpu pwr10 \
+// RUN: -emit-llvm %s -o - | FileCheck %s
+
+unsigned long long ulla, ullb;
+
+unsigned long long test_pdepd(void) {
+  // CHECK: @llvm.ppc.pdepd
+  return __builtin_pdepd(ulla, ullb);
+}
+
+unsigned long long test_pextd(void) {
+  // CHECK: @llvm.ppc.pextd
+  return __builtin_pextd(ulla, ullb);
+}

diff  --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
new file mode 100644
index 000000000000..31c24f382f1e
--- /dev/null
+++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -0,0 +1,20 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -target-feature +vsx -target-feature +altivec \
+// RUN:   -target-cpu pwr10 -triple powerpc64le-unknown-unknown -emit-llvm %s \
+// RUN:   -o - | FileCheck %s
+
+#include <altivec.h>
+
+vector unsigned long long vulla, vullb;
+
+vector unsigned long long test_vpdepd(void) {
+  // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64>
+  // CHECK-NEXT: ret <2 x i64>
+  return vec_pdep(vulla, vullb);
+}
+
+vector unsigned long long test_vpextd(void) {
+  // CHECK: @llvm.ppc.altivec.vpextd(<2 x i64>
+  // CHECK-NEXT: ret <2 x i64>
+  return vec_pext(vulla, vullb);
+}

diff  --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 8e4bfed83e00..79a3221f0141 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -60,6 +60,14 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
                        Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
                                  [IntrNoMem]>;
 
+  // Parallel Bits Deposit/Extract Doubleword Builtins.
+  def int_ppc_pdepd
+      : GCCBuiltin<"__builtin_pdepd">,
+        Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+  def int_ppc_pextd
+      : GCCBuiltin<"__builtin_pextd">,
+        Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+
   def int_ppc_truncf128_round_to_odd
       : GCCBuiltin<"__builtin_truncf128_round_to_odd">,
         Intrinsic <[llvm_double_ty], [llvm_f128_ty], [IntrNoMem]>;
@@ -402,6 +410,13 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
   def int_ppc_altivec_vprtybq : GCCBuiltin<"__builtin_altivec_vprtybq">,
               Intrinsic<[llvm_v1i128_ty],[llvm_v1i128_ty],[IntrNoMem]>;
 
+  // P10 Vector Parallel Bits Deposit/Extract Doubleword Builtins.
+  def int_ppc_altivec_vpdepd : GCCBuiltin<"__builtin_altivec_vpdepd">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vpextd : GCCBuiltin<"__builtin_altivec_vpextd">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
 }
 
 // Vector average.

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index a4a9688e3de7..a90cba09c619 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -191,6 +191,7 @@ multiclass 8LS_DForm_R_SI34_XT6_RA5_p<bits<5> opcode, dag OOL, dag IOL,
 }
 
 def PrefixInstrs : Predicate<"PPCSubTarget->hasPrefixInstrs()">;
+def IsISA3_1 : Predicate<"PPCSubTarget->isISA3_1()">;
 
 let Predicates = [PrefixInstrs] in {
   let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
@@ -500,3 +501,19 @@ let Predicates = [PCRelativeMemops], AddedComplexity = 500 in {
   def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
 }
 
+let Predicates = [IsISA3_1] in {
+   def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                         "vpdepd $vD, $vA, $vB", IIC_VecGeneral,
+                         [(set v2i64:$vD,
+                         (int_ppc_altivec_vpdepd v2i64:$vA, v2i64:$vB))]>;
+   def VPEXTD : VXForm_1<1421, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                         "vpextd $vD, $vA, $vB", IIC_VecGeneral,
+                         [(set v2i64:$vD,
+                         (int_ppc_altivec_vpextd v2i64:$vA, v2i64:$vB))]>;
+   def PDEPD : XForm_6<31, 156, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                       "pdepd $rA, $rS, $rB", IIC_IntGeneral,
+                       [(set i64:$rA, (int_ppc_pdepd i64:$rS, i64:$rB))]>;
+   def PEXTD : XForm_6<31, 188, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                       "pextd $rA, $rS, $rB", IIC_IntGeneral,
+                       [(set i64:$rA, (int_ppc_pextd i64:$rS, i64:$rB))]>;
+}

diff  --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index f4309dbde482..0a1ae7e55b3c 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -41,8 +41,10 @@ def P9Model : SchedMachineModel {
   let CompleteModel = 1;
 
   // Do not support QPX (Quad Processing eXtension), SPE (Signal Processing
-  // Engine), prefixed instructions on Power 9 or PC relative mem ops.
-  let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops];
+  // Engine), prefixed instructions on Power 9, PC relative mem ops, or
+  // instructions introduced in ISA 3.1.
+  let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops,
+                             IsISA3_1];
 
 }
 

diff  --git a/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll b/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll
new file mode 100644
index 000000000000..fc2ebf89079f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; These test cases aim to test the bit manipulation operations on Power10.
+
+declare <2 x i64> @llvm.ppc.altivec.vpdepd(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.ppc.altivec.vpextd(<2 x i64>, <2 x i64>)
+declare i64 @llvm.ppc.pdepd(i64, i64)
+declare i64 @llvm.ppc.pextd(i64, i64)
+
+define <2 x i64> @test_vpdepd(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vpdepd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vpdepd v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %tmp = tail call <2 x i64> @llvm.ppc.altivec.vpdepd(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %tmp
+}
+
+define <2 x i64> @test_vpextd(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vpextd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vpextd v2, v2, v3
+; CHECK-NEXT:    blr
+entry:
+  %tmp = tail call <2 x i64> @llvm.ppc.altivec.vpextd(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %tmp
+}
+
+define i64 @test_pdepd(i64 %a, i64 %b) {
+; CHECK-LABEL: test_pdepd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pdepd r3, r3, r4
+; CHECK-NEXT:    blr
+entry:
+  %tmp = tail call i64 @llvm.ppc.pdepd(i64 %a, i64 %b)
+  ret i64 %tmp
+}
+
+define i64 @test_pextd(i64 %a, i64 %b) {
+; CHECK-LABEL: test_pextd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pextd r3, r3, r4
+; CHECK-NEXT:    blr
+entry:
+  %tmp = tail call i64 @llvm.ppc.pextd(i64 %a, i64 %b)
+  ret i64 %tmp
+}

diff  --git a/llvm/test/MC/Disassembler/PowerPC/p10insts.txt b/llvm/test/MC/Disassembler/PowerPC/p10insts.txt
new file mode 100644
index 000000000000..ac95e30fbde5
--- /dev/null
+++ b/llvm/test/MC/Disassembler/PowerPC/p10insts.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc --disassemble %s -triple powerpc64-unknown-linux-gnu \
+# RUN:   -mcpu=pwr10 | FileCheck %s
+
+# CHECK: vpdepd 1, 2, 0
+0x10 0x22 0x05 0xcd
+
+# CHECK: vpextd 1, 2, 0
+0x10 0x22 0x05 0x8d
+
+# CHECK: pdepd 1, 2, 4
+0x7c 0x41 0x21 0x38
+
+# CHECK: pextd 1, 2, 4
+0x7c 0x41 0x21 0x78

diff  --git a/llvm/test/MC/PowerPC/p10.s b/llvm/test/MC/PowerPC/p10.s
new file mode 100644
index 000000000000..d2b399c531b4
--- /dev/null
+++ b/llvm/test/MC/PowerPC/p10.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple powerpc64-unknown-linux-gnu --show-encoding %s | \
+# RUN:   FileCheck -check-prefix=CHECK-BE %s
+# RUN: llvm-mc -triple powerpc64le-unknown-linux-gnu --show-encoding %s | \
+# RUN:   FileCheck -check-prefix=CHECK-LE %s
+
+# CHECK-BE: vpdepd 1, 2, 0                        # encoding: [0x10,0x22,0x05,0xcd]
+# CHECK-LE: vpdepd 1, 2, 0                        # encoding: [0xcd,0x05,0x22,0x10]
+            vpdepd 1, 2, 0
+# CHECK-BE: vpextd 1, 2, 0                        # encoding: [0x10,0x22,0x05,0x8d]
+# CHECK-LE: vpextd 1, 2, 0                        # encoding: [0x8d,0x05,0x22,0x10]
+            vpextd 1, 2, 0
+# CHECK-BE: pdepd 1, 2, 4                         # encoding: [0x7c,0x41,0x21,0x38]
+# CHECK-LE: pdepd 1, 2, 4                         # encoding: [0x38,0x21,0x41,0x7c]
+            pdepd 1, 2, 4
+# CHECK-BE: pextd 1, 2, 4                         # encoding: [0x7c,0x41,0x21,0x78]
+# CHECK-LE: pextd 1, 2, 4                         # encoding: [0x78,0x21,0x41,0x7c]
+            pextd 1, 2, 4


        


More information about the cfe-commits mailing list