[llvm] 2c3bc91 - [PowerPC] Implement Vector Count Mask Bits builtins in LLVM/Clang
Amy Kwan via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 17 16:40:03 PDT 2020
Author: Amy Kwan
Date: 2020-09-17T18:20:53-05:00
New Revision: 2c3bc918db35913437e9302b77b11c08eb3ea6e4
URL: https://github.com/llvm/llvm-project/commit/2c3bc918db35913437e9302b77b11c08eb3ea6e4
DIFF: https://github.com/llvm/llvm-project/commit/2c3bc918db35913437e9302b77b11c08eb3ea6e4.diff
LOG: [PowerPC] Implement Vector Count Mask Bits builtins in LLVM/Clang
This patch implements the vec_cntm function prototypes in altivec.h in order to
utilize the vector count mask bits instructions introduced in Power10.
Differential Revision: https://reviews.llvm.org/D82726
Added:
Modified:
clang/include/clang/Basic/BuiltinsPPC.def
clang/lib/Headers/altivec.h
clang/test/CodeGen/builtins-ppc-p10vector.c
llvm/include/llvm/IR/IntrinsicsPowerPC.td
llvm/lib/Target/PowerPC/PPCInstrPrefix.td
llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 89dd03075b28..4b97cbc09209 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -329,6 +329,12 @@ BUILTIN(__builtin_altivec_vexpandwm, "V4UiV4Ui", "")
BUILTIN(__builtin_altivec_vexpanddm, "V2ULLiV2ULLi", "")
BUILTIN(__builtin_altivec_vexpandqm, "V1ULLLiV1ULLLi", "")
+// P10 Vector Count with Mask built-ins.
+BUILTIN(__builtin_altivec_vcntmbb, "ULLiV16UcUi", "")
+BUILTIN(__builtin_altivec_vcntmbh, "ULLiV8UsUi", "")
+BUILTIN(__builtin_altivec_vcntmbw, "ULLiV4UiUi", "")
+BUILTIN(__builtin_altivec_vcntmbd, "ULLiV2ULLiUi", "")
+
// P10 Vector Parallel Bits built-ins.
BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "")
BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "")
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 51fd3d21b5e1..32b161d82d8e 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -17080,6 +17080,18 @@ vec_expandm(vector unsigned __int128 __a) {
return __builtin_altivec_vexpandqm(__a);
}
+/* vec_cntm */
+
+#define vec_cntm(__a, __mp) \
+ _Generic((__a), vector unsigned char \
+ : __builtin_altivec_vcntmbb((__a), (unsigned int)(__mp)), \
+ vector unsigned short \
+ : __builtin_altivec_vcntmbh((__a), (unsigned int)(__mp)), \
+ vector unsigned int \
+ : __builtin_altivec_vcntmbw((__a), (unsigned int)(__mp)), \
+ vector unsigned long long \
+ : __builtin_altivec_vcntmbd((__a), (unsigned int)(__mp)))
+
/* vec_pdep */
static __inline__ vector unsigned long long __ATTRS_o_ai
diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
index 12ec3a6ab8f3..0f72c5b0146e 100644
--- a/clang/test/CodeGen/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -244,6 +244,30 @@ vector unsigned __int128 test_vec_expandm_u128(void) {
return vec_expandm(vui128a);
}
+unsigned long long test_vec_cntm_uc(void) {
+ // CHECK: @llvm.ppc.altivec.vcntmbb(<16 x i8> %{{.+}}, i32
+ // CHECK-NEXT: ret i64
+ return vec_cntm(vuca, 1);
+}
+
+unsigned long long test_vec_cntm_us(void) {
+ // CHECK: @llvm.ppc.altivec.vcntmbh(<8 x i16> %{{.+}}, i32
+ // CHECK-NEXT: ret i64
+ return vec_cntm(vusa, 0);
+}
+
+unsigned long long test_vec_cntm_ui(void) {
+ // CHECK: @llvm.ppc.altivec.vcntmbw(<4 x i32> %{{.+}}, i32
+ // CHECK-NEXT: ret i64
+ return vec_cntm(vuia, 1);
+}
+
+unsigned long long test_vec_cntm_ull(void) {
+ // CHECK: @llvm.ppc.altivec.vcntmbd(<2 x i64> %{{.+}}, i32
+ // CHECK-NEXT: ret i64
+ return vec_cntm(vulla, 0);
+}
+
unsigned long long test_vgnb_1(void) {
// CHECK: @llvm.ppc.altivec.vgnb(<1 x i128> %{{.+}}, i32 2)
// CHECK-NEXT: ret i64
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 73a49ec77f8b..34ef4b768e3b 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -467,6 +467,20 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vexpandqm : GCCBuiltin<"__builtin_altivec_vexpandqm">,
Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty], [IntrNoMem]>;
+ // P10 Vector Count with Mask intrinsics.
+ def int_ppc_altivec_vcntmbb : GCCBuiltin<"__builtin_altivec_vcntmbb">,
+ Intrinsic<[llvm_i64_ty], [llvm_v16i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_ppc_altivec_vcntmbh : GCCBuiltin<"__builtin_altivec_vcntmbh">,
+ Intrinsic<[llvm_i64_ty], [llvm_v8i16_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_ppc_altivec_vcntmbw : GCCBuiltin<"__builtin_altivec_vcntmbw">,
+ Intrinsic<[llvm_i64_ty], [llvm_v4i32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_ppc_altivec_vcntmbd : GCCBuiltin<"__builtin_altivec_vcntmbd">,
+ Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
// P10 Vector Parallel Bits Deposit/Extract Doubleword Builtins.
def int_ppc_altivec_vpdepd : GCCBuiltin<"__builtin_altivec_vpdepd">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 4e951114b90f..815d0c74bdfb 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1046,19 +1046,23 @@ let Predicates = [IsISA3_1] in {
def VCNTMBB : VXForm_RD5_MP_VB5<1602, 12, (outs g8rc:$rD),
(ins vrrc:$vB, u1imm:$MP),
"vcntmbb $rD, $vB, $MP", IIC_VecGeneral,
- []>;
+ [(set i64:$rD, (int_ppc_altivec_vcntmbb
+ v16i8:$vB, timm:$MP))]>;
def VCNTMBH : VXForm_RD5_MP_VB5<1602, 13, (outs g8rc:$rD),
(ins vrrc:$vB, u1imm:$MP),
"vcntmbh $rD, $vB, $MP", IIC_VecGeneral,
- []>;
+ [(set i64:$rD, (int_ppc_altivec_vcntmbh
+ v8i16:$vB, timm:$MP))]>;
def VCNTMBW : VXForm_RD5_MP_VB5<1602, 14, (outs g8rc:$rD),
(ins vrrc:$vB, u1imm:$MP),
"vcntmbw $rD, $vB, $MP", IIC_VecGeneral,
- []>;
+ [(set i64:$rD, (int_ppc_altivec_vcntmbw
+ v4i32:$vB, timm:$MP))]>;
def VCNTMBD : VXForm_RD5_MP_VB5<1602, 15, (outs g8rc:$rD),
(ins vrrc:$vB, u1imm:$MP),
"vcntmbd $rD, $vB, $MP", IIC_VecGeneral,
- []>;
+ [(set i64:$rD, (int_ppc_altivec_vcntmbd
+ v2i64:$vB, timm:$MP))]>;
def VEXTDUBVLX : VAForm_1a<24, (outs vrrc:$vD),
(ins vrrc:$vA, vrrc:$vB, gprc:$rC),
"vextdubvlx $vD, $vA, $vB, $rC",
diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll b/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll
index 637361f7b1c9..65e9abd657ad 100644
--- a/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll
@@ -120,3 +120,48 @@ entry:
%exp = tail call <1 x i128> @llvm.ppc.altivec.vexpandqm(<1 x i128> %a)
ret <1 x i128> %exp
}
+
+declare i64 @llvm.ppc.altivec.vcntmbb(<16 x i8>, i32)
+declare i64 @llvm.ppc.altivec.vcntmbh(<8 x i16>, i32)
+declare i64 @llvm.ppc.altivec.vcntmbw(<4 x i32>, i32)
+declare i64 @llvm.ppc.altivec.vcntmbd(<2 x i64>, i32)
+
+define i64 @test_vcntmbb(<16 x i8> %a) {
+; CHECK-LABEL: test_vcntmbb:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcntmbb r3, v2, 1
+; CHECK-NEXT: blr
+entry:
+ %cnt = tail call i64 @llvm.ppc.altivec.vcntmbb(<16 x i8> %a, i32 1)
+ ret i64 %cnt
+}
+
+define i64 @test_vcntmbh(<8 x i16> %a) {
+; CHECK-LABEL: test_vcntmbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcntmbh r3, v2, 0
+; CHECK-NEXT: blr
+entry:
+ %cnt = tail call i64 @llvm.ppc.altivec.vcntmbh(<8 x i16> %a, i32 0)
+ ret i64 %cnt
+}
+
+define i64 @test_vcntmbw(<4 x i32> %a) {
+; CHECK-LABEL: test_vcntmbw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcntmbw r3, v2, 1
+; CHECK-NEXT: blr
+entry:
+ %cnt = tail call i64 @llvm.ppc.altivec.vcntmbw(<4 x i32> %a, i32 1)
+ ret i64 %cnt
+}
+
+define i64 @test_vcntmbd(<2 x i64> %a) {
+; CHECK-LABEL: test_vcntmbd:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcntmbd r3, v2, 0
+; CHECK-NEXT: blr
+entry:
+ %cnt = tail call i64 @llvm.ppc.altivec.vcntmbd(<2 x i64> %a, i32 0)
+ ret i64 %cnt
+}
More information about the llvm-commits
mailing list