[clang] 0c2d872 - [PowerPC] Implement builtins for xvcvspbf16 and xvcvbf16spn
Amy Kwan via cfe-commits
cfe-commits at lists.llvm.org
Tue Sep 1 15:19:12 PDT 2020
Author: Amy Kwan
Date: 2020-09-01T17:16:43-05:00
New Revision: 0c2d872d5dec3eba10a8245bbcb3eebcf405ef9f
URL: https://github.com/llvm/llvm-project/commit/0c2d872d5dec3eba10a8245bbcb3eebcf405ef9f
DIFF: https://github.com/llvm/llvm-project/commit/0c2d872d5dec3eba10a8245bbcb3eebcf405ef9f.diff
LOG: [PowerPC] Implement builtins for xvcvspbf16 and xvcvbf16spn
This patch adds the builtin implementation for the xvcvspbf16 and xvcvbf16spn
instructions.
Differential Revision: https://reviews.llvm.org/D86795
Added:
llvm/test/CodeGen/PowerPC/bfloat16-outer-product.ll
Modified:
clang/include/clang/Basic/BuiltinsPPC.def
clang/test/CodeGen/builtins-ppc-p10vector.c
llvm/include/llvm/IR/IntrinsicsPowerPC.td
llvm/lib/Target/PowerPC/PPCInstrPrefix.td
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 9a33ba06d82e..b9824588939b 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -484,6 +484,9 @@ BUILTIN(__builtin_vsx_xvcvdpsp, "V4fV2d", "")
BUILTIN(__builtin_vsx_xvcvsphp, "V4fV4f", "")
BUILTIN(__builtin_vsx_xvcvhpsp, "V4fV8Us", "")
+BUILTIN(__builtin_vsx_xvcvspbf16, "V16UcV16Uc", "")
+BUILTIN(__builtin_vsx_xvcvbf16spn, "V16UcV16Uc", "")
+
// Vector Test Data Class builtins
BUILTIN(__builtin_vsx_xvtstdcdp, "V2ULLiV2dIi", "")
BUILTIN(__builtin_vsx_xvtstdcsp, "V4UiV4fIi", "")
diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
index 09477891bb06..6fe6d9fdf72d 100644
--- a/clang/test/CodeGen/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -137,6 +137,18 @@ vector unsigned long long test_vec_mod_ull(void) {
return vec_mod(vulla, vullb);
}
+vector unsigned char test_xvcvspbf16(vector unsigned char vc) {
+ // CHECK-LABEL: @test_xvcvspbf16(
+ // CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.ppc.vsx.xvcvspbf16(<16 x i8> [[VC:%.*]])
+ return __builtin_vsx_xvcvspbf16(vc);
+}
+
+vector unsigned char test_xvcvbf16spn(vector unsigned char vc) {
+ // CHECK-LABEL: @test_xvcvbf16spn(
+ // CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.ppc.vsx.xvcvbf16spn(<16 x i8> [[VC:%.*]])
+ return __builtin_vsx_xvcvbf16spn(vc);
+}
+
vector unsigned long long test_vpdepd(void) {
// CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64>
// CHECK-NEXT: ret <2 x i64>
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 2ff045865bb7..1ef44b735c9f 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1124,6 +1124,12 @@ def int_ppc_vsx_xvtstdcsp :
def int_ppc_vsx_xvcvhpsp :
PowerPC_VSX_Intrinsic<"xvcvhpsp", [llvm_v4f32_ty],
[llvm_v8i16_ty],[IntrNoMem]>;
+def int_ppc_vsx_xvcvspbf16 :
+ PowerPC_VSX_Intrinsic<"xvcvspbf16", [llvm_v16i8_ty],
+ [llvm_v16i8_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcvbf16spn :
+ PowerPC_VSX_Intrinsic<"xvcvbf16spn", [llvm_v16i8_ty],
+ [llvm_v16i8_ty], [IntrNoMem]>;
def int_ppc_vsx_xxextractuw :
PowerPC_VSX_Intrinsic<"xxextractuw",[llvm_v2i64_ty],
[llvm_v2i64_ty,llvm_i32_ty], [IntrNoMem]>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index fc4e57ec04f6..81455adbd0b7 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -515,6 +515,11 @@ def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">;
def PairedVectorMemops : Predicate<"PPCSubTarget->pairedVectorMemops()">;
def MMA : Predicate<"PPCSubTarget->hasMMA()">;
+def RCCp {
+ dag AToVSRC = (COPY_TO_REGCLASS $XA, VSRC);
+ dag BToVSRC = (COPY_TO_REGCLASS $XB, VSRC);
+}
+
let Predicates = [PrefixInstrs] in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
defm PADDI8 :
@@ -1351,6 +1356,13 @@ let Predicates = [IsISA3_1] in {
(v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>;
}
+let Predicates = [IsISA3_1, HasVSX] in {
+ def : Pat<(v16i8 (int_ppc_vsx_xvcvspbf16 v16i8:$XA)),
+ (COPY_TO_REGCLASS (XVCVSPBF16 RCCp.AToVSRC), VRRC)>;
+ def : Pat<(v16i8 (int_ppc_vsx_xvcvbf16spn v16i8:$XA)),
+ (COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>;
+}
+
let AddedComplexity = 400, Predicates = [IsISA3_1] in {
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$rS, 0)), xoaddr:$src),
(STXVRBX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
diff --git a/llvm/test/CodeGen/PowerPC/bfloat16-outer-product.ll b/llvm/test/CodeGen/PowerPC/bfloat16-outer-product.ll
new file mode 100644
index 000000000000..772235269667
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/bfloat16-outer-product.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+; Function Attrs: nofree nounwind writeonly
+define dso_local void @test60(i8* nocapture readnone %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
+; CHECK-LABEL: test60:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvcvspbf16 vs0, v2
+; CHECK-NEXT: stxv vs0, 0(r7)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test60:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: xvcvspbf16 vs0, v2
+; CHECK-BE-NEXT: stxv vs0, 0(r7)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = tail call <16 x i8> @llvm.ppc.vsx.xvcvspbf16(<16 x i8> %vc)
+ %1 = bitcast i8* %resp to <16 x i8>*
+ store <16 x i8> %0, <16 x i8>* %1, align 16
+ ret void
+}
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.vsx.xvcvspbf16(<16 x i8>)
+
+; Function Attrs: nofree nounwind writeonly
+define dso_local void @test61(i8* nocapture readnone %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
+; CHECK-LABEL: test61:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvcvbf16spn vs0, v2
+; CHECK-NEXT: stxv vs0, 0(r7)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test61:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: xvcvbf16spn vs0, v2
+; CHECK-BE-NEXT: stxv vs0, 0(r7)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = tail call <16 x i8> @llvm.ppc.vsx.xvcvbf16spn(<16 x i8> %vc)
+ %1 = bitcast i8* %resp to <16 x i8>*
+ store <16 x i8> %0, <16 x i8>* %1, align 16
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.vsx.xvcvbf16spn(<16 x i8>)
More information about the cfe-commits
mailing list