[llvm] 2860734 - [PowerPC]Implement Vector Permute Extended Builtin

Thu Jul 2 12:53:42 PDT 2020

Author: Biplob Mishra
Date: 2020-07-02T14:53:18-05:00
New Revision: 286073484f7d36c8d0481be2a2f436f973389f54

URL: https://github.com/llvm/llvm-project/commit/286073484f7d36c8d0481be2a2f436f973389f54
DIFF: https://github.com/llvm/llvm-project/commit/286073484f7d36c8d0481be2a2f436f973389f54.diff

LOG: [PowerPC]Implement Vector Permute Extended Builtin

Implements vector permute builtin: vec_permx()

Differential Revision: https://reviews.llvm.org/D82869

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsPPC.def
    clang/lib/Headers/altivec.h
    clang/lib/Sema/SemaChecking.cpp
    clang/test/CodeGen/builtins-ppc-p10vector.c
    llvm/include/llvm/IR/IntrinsicsPowerPC.td
    llvm/lib/Target/PowerPC/PPCInstrPrefix.td
    llvm/test/CodeGen/PowerPC/builtins-ppc-p10permute.ll

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 2a5ae08ea7b2..10f2f2cdf0ee 100644

--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -451,6 +451,9 @@ BUILTIN(__builtin_vsx_xxsldwi, "v.", "t")
 
 BUILTIN(__builtin_vsx_xxeval, "V2ULLiV2ULLiV2ULLiV2ULLiIi", "")
 
+// P10 Vector Permute Extended built-in.
+BUILTIN(__builtin_vsx_xxpermx, "V16UcV16UcV16UcV16UcIi", "")
+
 // Float 128 built-ins
 BUILTIN(__builtin_sqrtf128_round_to_odd, "LLdLLd", "")
 BUILTIN(__builtin_addf128_round_to_odd, "LLdLLdLLd", "")

diff  --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 85f712f18ef3..220f4f88f8e3 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -16889,6 +16889,14 @@ vec_cnttzm(vector unsigned long long __a, vector unsigned long long __b) {
 
 #define vec_srdb(__a, __b, __c) __builtin_altivec_vsrdbi(__a, __b, (__c & 0x7))
 
+#ifdef __VSX__
+
+/* vec_permx */
+
+#define vec_permx(__a, __b, __c, __d)                                          \
+  __builtin_vsx_xxpermx((__a), (__b), (__c), (__d))
+
+#endif /* __VSX__ */
 #endif /* __POWER10_VECTOR__ */
 
 #undef __ATTRS_o_ai

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index a8d25bd7e240..672fe77a3359 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3132,6 +3132,8 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
      return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7);
   case PPC::BI__builtin_altivec_vsrdbi:
      return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7);
+  case PPC::BI__builtin_vsx_xxpermx:
+     return SemaBuiltinConstantArgRange(TheCall, 3, 0, 7);
   }
   return SemaBuiltinConstantArgRange(TheCall, i, l, u);
 }

diff  --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
index efb63ce808cb..ecc470a289ce 100644
--- a/clang/test/CodeGen/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -14,6 +14,8 @@ vector unsigned int vuia, vuib, vuic;
 vector signed long long vslla, vsllb;
 vector unsigned long long vulla, vullb, vullc;
 vector unsigned __int128 vui128a, vui128b, vui128c;
+vector float vfa, vfb;
+vector double vda, vdb;
 unsigned int uia;
 
 vector unsigned long long test_vpdepd(void) {
@@ -257,3 +259,71 @@ vector unsigned long long test_vec_srdb_ull(void) {
   // CHECK-NEXT: ret <2 x i64>
   return vec_srdb(vulla, vullb, 7);
 }
+
+vector signed char test_vec_permx_sc(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: ret <16 x i8>
+  return vec_permx(vsca, vscb, vucc, 0);
+}
+
+vector unsigned char test_vec_permx_uc(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: ret <16 x i8>
+  return vec_permx(vuca, vucb, vucc, 1);
+}
+
+vector signed short test_vec_permx_ss(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <8 x i16>
+  // CHECK-NEXT: ret <8 x i16>
+  return vec_permx(vssa, vssb, vucc, 2);
+}
+
+vector unsigned short test_vec_permx_us(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <8 x i16>
+  // CHECK-NEXT: ret <8 x i16>
+  return vec_permx(vusa, vusb, vucc, 3);
+}
+
+vector signed int test_vec_permx_si(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32>
+  // CHECK-NEXT: ret <4 x i32>
+  return vec_permx(vsia, vsib, vucc, 4);
+}
+
+vector unsigned int test_vec_permx_ui(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32>
+  // CHECK-NEXT: ret <4 x i32>
+  return vec_permx(vuia, vuib, vucc, 5);
+}
+
+vector signed long long test_vec_permx_sll(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <2 x i64>
+  // CHECK-NEXT: ret <2 x i64>
+  return vec_permx(vslla, vsllb, vucc, 6);
+}
+
+vector unsigned long long test_vec_permx_ull(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <2 x i64>
+  // CHECK-NEXT: ret <2 x i64>
+  return vec_permx(vulla, vullb, vucc, 7);
+}
+
+vector float test_vec_permx_f(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x float>
+  // CHECK-NEXT: ret <4 x float>
+  return vec_permx(vfa, vfb, vucc, 0);
+}
+
+vector double test_vec_permx_d(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <2 x double>
+  // CHECK-NEXT: ret <2 x double>
+  return vec_permx(vda, vdb, vucc, 1);
+}

diff  --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 9ad9c6295200..797af23d3045 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1017,6 +1017,13 @@ def int_ppc_vsx_xxgenpcvwm :
 def int_ppc_vsx_xxgenpcvdm :
       PowerPC_VSX_Intrinsic<"xxgenpcvdm", [llvm_v2i64_ty],
                             [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+
+// P10 VSX Vector permute extended.
+def int_ppc_vsx_xxpermx : 
+      GCCBuiltin<"__builtin_vsx_xxpermx">,
+      Intrinsic<[llvm_v16i8_ty],
+                [llvm_v16i8_ty,llvm_v16i8_ty,llvm_v16i8_ty,llvm_i32_ty],
+                [IntrNoMem, ImmArg<ArgIndex<3>>]>;
 }
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index d78bd485b6f7..8b9373fc9362 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -845,3 +845,11 @@ let AddedComplexity = 400, Predicates = [PrefixInstrs] in {
            (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)),
                              VSFRC)>;
 }
+
+let Predicates = [PrefixInstrs] in {
+  def : Pat<(v16i8 (int_ppc_vsx_xxpermx v16i8:$A, v16i8:$B, v16i8:$C, timm:$D)),
+            (COPY_TO_REGCLASS (XXPERMX (COPY_TO_REGCLASS $A, VSRC),
+                                       (COPY_TO_REGCLASS $B, VSRC),
+                                       (COPY_TO_REGCLASS $C, VSRC), $D), VSRC)>;
+}
+

diff  --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10permute.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10permute.ll
index ce4fa5b25b5a..ca73e56ff2e4 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10permute.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10permute.ll
@@ -26,3 +26,14 @@ entry:
   ret <16 x i8> %0
 }
 declare <16 x i8> @llvm.ppc.altivec.vsrdbi(<16 x i8>, <16 x i8>, i32 immarg)
+
+define <16 x i8> @testXXPERMX(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: testXXPERMX:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxpermx v2, v2, v3, v4, 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call <16 x i8> @llvm.ppc.vsx.xxpermx(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, i32 1)
+  ret <16 x i8> %0
+}
+declare <16 x i8> @llvm.ppc.vsx.xxpermx(<16 x i8>, <16 x i8>, <16 x i8>, i32 immarg)