[llvm] r341259 - [X86] Add intrinsics for KTEST instructions.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 31 14:31:53 PDT 2018
Author: ctopper
Date: Fri Aug 31 14:31:53 2018
New Revision: 341259
URL: http://llvm.org/viewvc/llvm-project?rev=341259&view=rev
Log:
[X86] Add intrinsics for KTEST instructions.
These intrinsics use the same implementation as PTEST intrinsics, but use vXi1 vectors.
New clang builtins will be accompanying them shortly.
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=341259&r1=341258&r2=341259&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Fri Aug 31 14:31:53 2018
@@ -2773,6 +2773,24 @@ let TargetPrefix = "x86" in {
Intrinsic<[llvm_v32i1_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>;
def int_x86_avx512_kadd_q :
Intrinsic<[llvm_v64i1_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_ktestc_b :
+ Intrinsic<[llvm_i32_ty], [llvm_v8i1_ty, llvm_v8i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_ktestc_w :
+ Intrinsic<[llvm_i32_ty], [llvm_v16i1_ty, llvm_v16i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_ktestc_d :
+ Intrinsic<[llvm_i32_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_ktestc_q :
+ Intrinsic<[llvm_i32_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_ktestz_b :
+ Intrinsic<[llvm_i32_ty], [llvm_v8i1_ty, llvm_v8i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_ktestz_w :
+ Intrinsic<[llvm_i32_ty], [llvm_v16i1_ty, llvm_v16i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_ktestz_d :
+ Intrinsic<[llvm_i32_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>;
+ def int_x86_avx512_ktestz_q :
+ Intrinsic<[llvm_i32_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>;
}
// Conversion ops
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=341259&r1=341258&r2=341259&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Aug 31 14:31:53 2018
@@ -21297,6 +21297,14 @@ SDValue X86TargetLowering::LowerINTRINSI
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
// or testp pattern and a setcc for the result.
+ case Intrinsic::x86_avx512_ktestc_b:
+ case Intrinsic::x86_avx512_ktestc_w:
+ case Intrinsic::x86_avx512_ktestc_d:
+ case Intrinsic::x86_avx512_ktestc_q:
+ case Intrinsic::x86_avx512_ktestz_b:
+ case Intrinsic::x86_avx512_ktestz_w:
+ case Intrinsic::x86_avx512_ktestz_d:
+ case Intrinsic::x86_avx512_ktestz_q:
case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_sse41_ptestnzc:
@@ -21315,15 +21323,30 @@ SDValue X86TargetLowering::LowerINTRINSI
case Intrinsic::x86_avx_vtestz_pd_256:
case Intrinsic::x86_avx_vtestc_pd_256:
case Intrinsic::x86_avx_vtestnzc_pd_256: {
- bool IsTestPacked = false;
+ unsigned TestOpc = X86ISD::PTEST;
X86::CondCode X86CC;
switch (IntNo) {
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+ case Intrinsic::x86_avx512_ktestc_b:
+ case Intrinsic::x86_avx512_ktestc_w:
+ case Intrinsic::x86_avx512_ktestc_d:
+ case Intrinsic::x86_avx512_ktestc_q:
+ // CF = 1
+ TestOpc = X86ISD::KTEST;
+ X86CC = X86::COND_B;
+ break;
+ case Intrinsic::x86_avx512_ktestz_b:
+ case Intrinsic::x86_avx512_ktestz_w:
+ case Intrinsic::x86_avx512_ktestz_d:
+ case Intrinsic::x86_avx512_ktestz_q:
+ TestOpc = X86ISD::KTEST;
+ X86CC = X86::COND_E;
+ break;
case Intrinsic::x86_avx_vtestz_ps:
case Intrinsic::x86_avx_vtestz_pd:
case Intrinsic::x86_avx_vtestz_ps_256:
case Intrinsic::x86_avx_vtestz_pd_256:
- IsTestPacked = true;
+ TestOpc = X86ISD::TESTP;
LLVM_FALLTHROUGH;
case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_avx_ptestz_256:
@@ -21334,7 +21357,7 @@ SDValue X86TargetLowering::LowerINTRINSI
case Intrinsic::x86_avx_vtestc_pd:
case Intrinsic::x86_avx_vtestc_ps_256:
case Intrinsic::x86_avx_vtestc_pd_256:
- IsTestPacked = true;
+ TestOpc = X86ISD::TESTP;
LLVM_FALLTHROUGH;
case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_avx_ptestc_256:
@@ -21345,7 +21368,7 @@ SDValue X86TargetLowering::LowerINTRINSI
case Intrinsic::x86_avx_vtestnzc_pd:
case Intrinsic::x86_avx_vtestnzc_ps_256:
case Intrinsic::x86_avx_vtestnzc_pd_256:
- IsTestPacked = true;
+ TestOpc = X86ISD::TESTP;
LLVM_FALLTHROUGH;
case Intrinsic::x86_sse41_ptestnzc:
case Intrinsic::x86_avx_ptestnzc_256:
@@ -21356,7 +21379,6 @@ SDValue X86TargetLowering::LowerINTRINSI
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
- unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=341259&r1=341258&r2=341259&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Fri Aug 31 14:31:53 2018
@@ -58,6 +58,74 @@ entry:
}
declare <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1>, <64 x i1>)
+define i32 @test_x86_avx512_ktestc_d(<32 x i16> %A, <32 x i16> %B) {
+; CHECK-LABEL: test_x86_avx512_ktestc_d:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0]
+; CHECK-NEXT: vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9]
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: ktestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x99,0xc1]
+; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = icmp ne <32 x i16> %A, zeroinitializer
+ %2 = icmp ne <32 x i16> %B, zeroinitializer
+ %res = call i32 @llvm.x86.avx512.ktestc.d(<32 x i1> %1, <32 x i1> %2) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx512.ktestc.d(<32 x i1>, <32 x i1>) nounwind readnone
+
+define i32 @test_x86_avx512_ktestz_d(<32 x i16> %A, <32 x i16> %B) {
+; CHECK-LABEL: test_x86_avx512_ktestz_d:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0]
+; CHECK-NEXT: vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9]
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: ktestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x99,0xc1]
+; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = icmp ne <32 x i16> %A, zeroinitializer
+ %2 = icmp ne <32 x i16> %B, zeroinitializer
+ %res = call i32 @llvm.x86.avx512.ktestz.d(<32 x i1> %1, <32 x i1> %2) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx512.ktestz.d(<32 x i1>, <32 x i1>) nounwind readnone
+
+define i32 @test_x86_avx512_ktestc_q(<64 x i8> %A, <64 x i8> %B) {
+; CHECK-LABEL: test_x86_avx512_ktestc_q:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
+; CHECK-NEXT: vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: ktestq %k1, %k0 # encoding: [0xc4,0xe1,0xf8,0x99,0xc1]
+; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = icmp ne <64 x i8> %A, zeroinitializer
+ %2 = icmp ne <64 x i8> %B, zeroinitializer
+ %res = call i32 @llvm.x86.avx512.ktestc.q(<64 x i1> %1, <64 x i1> %2) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx512.ktestc.q(<64 x i1>, <64 x i1>) nounwind readnone
+
+define i32 @test_x86_avx512_ktestz_q(<64 x i8> %A, <64 x i8> %B) {
+; CHECK-LABEL: test_x86_avx512_ktestz_q:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
+; CHECK-NEXT: vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: ktestq %k1, %k0 # encoding: [0xc4,0xe1,0xf8,0x99,0xc1]
+; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = icmp ne <64 x i8> %A, zeroinitializer
+ %2 = icmp ne <64 x i8> %B, zeroinitializer
+ %res = call i32 @llvm.x86.avx512.ktestz.q(<64 x i1> %1, <64 x i1> %2) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx512.ktestz.q(<64 x i1>, <64 x i1>) nounwind readnone
+
define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
; CHECK-LABEL: test_mask_packs_epi32_rr_512:
; CHECK: # %bb.0:
Modified: llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll?rev=341259&r1=341258&r2=341259&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll Fri Aug 31 14:31:53 2018
@@ -48,6 +48,74 @@ entry:
}
declare <8 x i1> @llvm.x86.avx512.kadd.b(<8 x i1>, <8 x i1>)
+define i32 @test_x86_avx512_ktestc_w(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_x86_avx512_ktestc_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
+; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x27,0xc9]
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: ktestw %k1, %k0 # encoding: [0xc5,0xf8,0x99,0xc1]
+; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = icmp ne <16 x i32> %A, zeroinitializer
+ %2 = icmp ne <16 x i32> %B, zeroinitializer
+ %res = call i32 @llvm.x86.avx512.ktestc.w(<16 x i1> %1, <16 x i1> %2) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx512.ktestc.w(<16 x i1>, <16 x i1>) nounwind readnone
+
+define i32 @test_x86_avx512_ktestz_w(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_x86_avx512_ktestz_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
+; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x27,0xc9]
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: ktestw %k1, %k0 # encoding: [0xc5,0xf8,0x99,0xc1]
+; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = icmp ne <16 x i32> %A, zeroinitializer
+ %2 = icmp ne <16 x i32> %B, zeroinitializer
+ %res = call i32 @llvm.x86.avx512.ktestz.w(<16 x i1> %1, <16 x i1> %2) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx512.ktestz.w(<16 x i1>, <16 x i1>) nounwind readnone
+
+define i32 @test_x86_avx512_ktestc_b(<8 x i64> %A, <8 x i64> %B) {
+; CHECK-LABEL: test_x86_avx512_ktestc_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0]
+; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x27,0xc9]
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: ktestb %k1, %k0 # encoding: [0xc5,0xf9,0x99,0xc1]
+; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = icmp ne <8 x i64> %A, zeroinitializer
+ %2 = icmp ne <8 x i64> %B, zeroinitializer
+ %res = call i32 @llvm.x86.avx512.ktestc.b(<8 x i1> %1, <8 x i1> %2) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx512.ktestc.b(<8 x i1>, <8 x i1>) nounwind readnone
+
+define i32 @test_x86_avx512_ktestz_b(<8 x i64> %A, <8 x i64> %B) {
+; CHECK-LABEL: test_x86_avx512_ktestz_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0]
+; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x27,0xc9]
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: ktestb %k1, %k0 # encoding: [0xc5,0xf9,0x99,0xc1]
+; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = icmp ne <8 x i64> %A, zeroinitializer
+ %2 = icmp ne <8 x i64> %B, zeroinitializer
+ %res = call i32 @llvm.x86.avx512.ktestz.b(<8 x i1> %1, <8 x i1> %2) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx512.ktestz.b(<8 x i1>, <8 x i1>) nounwind readnone
+
declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
More information about the llvm-commits
mailing list