[clang] 0c2d872 - [PowerPC] Implement builtins for xvcvspbf16 and xvcvbf16spn

Amy Kwan via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 1 15:19:12 PDT 2020


Author: Amy Kwan
Date: 2020-09-01T17:16:43-05:00
New Revision: 0c2d872d5dec3eba10a8245bbcb3eebcf405ef9f

URL: https://github.com/llvm/llvm-project/commit/0c2d872d5dec3eba10a8245bbcb3eebcf405ef9f
DIFF: https://github.com/llvm/llvm-project/commit/0c2d872d5dec3eba10a8245bbcb3eebcf405ef9f.diff

LOG: [PowerPC] Implement builtins for xvcvspbf16 and xvcvbf16spn

This patch adds the builtin implementation for the xvcvspbf16 and xvcvbf16spn
instructions.

Differential Revision: https://reviews.llvm.org/D86795

Added: 
    llvm/test/CodeGen/PowerPC/bfloat16-outer-product.ll

Modified: 
    clang/include/clang/Basic/BuiltinsPPC.def
    clang/test/CodeGen/builtins-ppc-p10vector.c
    llvm/include/llvm/IR/IntrinsicsPowerPC.td
    llvm/lib/Target/PowerPC/PPCInstrPrefix.td

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 9a33ba06d82e..b9824588939b 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -484,6 +484,9 @@ BUILTIN(__builtin_vsx_xvcvdpsp, "V4fV2d", "")
 BUILTIN(__builtin_vsx_xvcvsphp, "V4fV4f", "")
 BUILTIN(__builtin_vsx_xvcvhpsp, "V4fV8Us", "")
 
+BUILTIN(__builtin_vsx_xvcvspbf16, "V16UcV16Uc", "")
+BUILTIN(__builtin_vsx_xvcvbf16spn, "V16UcV16Uc", "")
+
 // Vector Test Data Class builtins
 BUILTIN(__builtin_vsx_xvtstdcdp, "V2ULLiV2dIi", "")
 BUILTIN(__builtin_vsx_xvtstdcsp, "V4UiV4fIi", "")

diff  --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
index 09477891bb06..6fe6d9fdf72d 100644
--- a/clang/test/CodeGen/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -137,6 +137,18 @@ vector unsigned long long test_vec_mod_ull(void) {
   return vec_mod(vulla, vullb);
 }
 
+vector unsigned char test_xvcvspbf16(vector unsigned char vc) {
+  // CHECK-LABEL: @test_xvcvspbf16(
+  // CHECK:    [[TMP0:%.*]] = call <16 x i8> @llvm.ppc.vsx.xvcvspbf16(<16 x i8> [[VC:%.*]])
+  return __builtin_vsx_xvcvspbf16(vc);
+}
+
+vector unsigned char test_xvcvbf16spn(vector unsigned char vc) {
+  // CHECK-LABEL: @test_xvcvbf16spn(
+  // CHECK:    [[TMP0:%.*]] = call <16 x i8> @llvm.ppc.vsx.xvcvbf16spn(<16 x i8> [[VC:%.*]])
+  return __builtin_vsx_xvcvbf16spn(vc);
+}
+
 vector unsigned long long test_vpdepd(void) {
   // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64>
   // CHECK-NEXT: ret <2 x i64>

diff  --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 2ff045865bb7..1ef44b735c9f 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1124,6 +1124,12 @@ def int_ppc_vsx_xvtstdcsp :
 def int_ppc_vsx_xvcvhpsp :
       PowerPC_VSX_Intrinsic<"xvcvhpsp", [llvm_v4f32_ty],
                             [llvm_v8i16_ty],[IntrNoMem]>;
+def int_ppc_vsx_xvcvspbf16 :
+      PowerPC_VSX_Intrinsic<"xvcvspbf16", [llvm_v16i8_ty],
+                            [llvm_v16i8_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcvbf16spn :
+      PowerPC_VSX_Intrinsic<"xvcvbf16spn", [llvm_v16i8_ty],
+                            [llvm_v16i8_ty], [IntrNoMem]>;
 def int_ppc_vsx_xxextractuw :
       PowerPC_VSX_Intrinsic<"xxextractuw",[llvm_v2i64_ty],
                             [llvm_v2i64_ty,llvm_i32_ty], [IntrNoMem]>;

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index fc4e57ec04f6..81455adbd0b7 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -515,6 +515,11 @@ def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">;
 def PairedVectorMemops : Predicate<"PPCSubTarget->pairedVectorMemops()">;
 def MMA : Predicate<"PPCSubTarget->hasMMA()">;
 
+def RCCp {
+  dag AToVSRC = (COPY_TO_REGCLASS $XA, VSRC);
+  dag BToVSRC = (COPY_TO_REGCLASS $XB, VSRC);
+}
+
 let Predicates = [PrefixInstrs] in {
   let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
     defm PADDI8 :
@@ -1351,6 +1356,13 @@ let Predicates = [IsISA3_1] in {
              (v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>;
 }
 
+let Predicates = [IsISA3_1, HasVSX] in {
+  def : Pat<(v16i8 (int_ppc_vsx_xvcvspbf16 v16i8:$XA)),
+            (COPY_TO_REGCLASS (XVCVSPBF16 RCCp.AToVSRC), VRRC)>;
+  def : Pat<(v16i8 (int_ppc_vsx_xvcvbf16spn v16i8:$XA)),
+            (COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>;
+}
+
 let AddedComplexity = 400, Predicates = [IsISA3_1] in {
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$rS, 0)), xoaddr:$src),
             (STXVRBX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;

diff  --git a/llvm/test/CodeGen/PowerPC/bfloat16-outer-product.ll b/llvm/test/CodeGen/PowerPC/bfloat16-outer-product.ll
new file mode 100644
index 000000000000..772235269667
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/bfloat16-outer-product.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+; Function Attrs: nofree nounwind writeonly
+define dso_local void @test60(i8* nocapture readnone %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
+; CHECK-LABEL: test60:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvcvspbf16 vs0, v2
+; CHECK-NEXT:    stxv vs0, 0(r7)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test60:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xvcvspbf16 vs0, v2
+; CHECK-BE-NEXT:    stxv vs0, 0(r7)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = tail call <16 x i8> @llvm.ppc.vsx.xvcvspbf16(<16 x i8> %vc)
+  %1 = bitcast i8* %resp to <16 x i8>*
+  store <16 x i8> %0, <16 x i8>* %1, align 16
+  ret void
+}
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.vsx.xvcvspbf16(<16 x i8>)
+
+; Function Attrs: nofree nounwind writeonly
+define dso_local void @test61(i8* nocapture readnone %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
+; CHECK-LABEL: test61:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvcvbf16spn vs0, v2
+; CHECK-NEXT:    stxv vs0, 0(r7)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test61:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xvcvbf16spn vs0, v2
+; CHECK-BE-NEXT:    stxv vs0, 0(r7)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = tail call <16 x i8> @llvm.ppc.vsx.xvcvbf16spn(<16 x i8> %vc)
+  %1 = bitcast i8* %resp to <16 x i8>*
+  store <16 x i8> %0, <16 x i8>* %1, align 16
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.vsx.xvcvbf16spn(<16 x i8>)


        


More information about the cfe-commits mailing list