[llvm] r341259 - [X86] Add intrinsics for KTEST instructions.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 31 14:31:53 PDT 2018


Author: ctopper
Date: Fri Aug 31 14:31:53 2018
New Revision: 341259

URL: http://llvm.org/viewvc/llvm-project?rev=341259&view=rev
Log:
[X86] Add intrinsics for KTEST instructions.

These intrinsics use the same implementation as PTEST intrinsics, but use vXi1 vectors.

New clang builtins will be accompanying them shortly.

Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsX86.td
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll

Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=341259&r1=341258&r2=341259&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Fri Aug 31 14:31:53 2018
@@ -2773,6 +2773,24 @@ let TargetPrefix = "x86" in {
         Intrinsic<[llvm_v32i1_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>;
   def int_x86_avx512_kadd_q :
         Intrinsic<[llvm_v64i1_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_ktestc_b :
+          Intrinsic<[llvm_i32_ty], [llvm_v8i1_ty, llvm_v8i1_ty], [IntrNoMem]>;
+  def int_x86_avx512_ktestc_w :
+          Intrinsic<[llvm_i32_ty], [llvm_v16i1_ty, llvm_v16i1_ty], [IntrNoMem]>;
+  def int_x86_avx512_ktestc_d :
+          Intrinsic<[llvm_i32_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>;
+  def int_x86_avx512_ktestc_q :
+          Intrinsic<[llvm_i32_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_ktestz_b :
+          Intrinsic<[llvm_i32_ty], [llvm_v8i1_ty, llvm_v8i1_ty], [IntrNoMem]>;
+  def int_x86_avx512_ktestz_w :
+          Intrinsic<[llvm_i32_ty], [llvm_v16i1_ty, llvm_v16i1_ty], [IntrNoMem]>;
+  def int_x86_avx512_ktestz_d :
+          Intrinsic<[llvm_i32_ty], [llvm_v32i1_ty, llvm_v32i1_ty], [IntrNoMem]>;
+  def int_x86_avx512_ktestz_q :
+          Intrinsic<[llvm_i32_ty], [llvm_v64i1_ty, llvm_v64i1_ty], [IntrNoMem]>;
 }
 
 // Conversion ops

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=341259&r1=341258&r2=341259&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Aug 31 14:31:53 2018
@@ -21297,6 +21297,14 @@ SDValue X86TargetLowering::LowerINTRINSI
   // ptest and testp intrinsics. The intrinsic these come from are designed to
   // return an integer value, not just an instruction so lower it to the ptest
   // or testp pattern and a setcc for the result.
+  case Intrinsic::x86_avx512_ktestc_b:
+  case Intrinsic::x86_avx512_ktestc_w:
+  case Intrinsic::x86_avx512_ktestc_d:
+  case Intrinsic::x86_avx512_ktestc_q:
+  case Intrinsic::x86_avx512_ktestz_b:
+  case Intrinsic::x86_avx512_ktestz_w:
+  case Intrinsic::x86_avx512_ktestz_d:
+  case Intrinsic::x86_avx512_ktestz_q:
   case Intrinsic::x86_sse41_ptestz:
   case Intrinsic::x86_sse41_ptestc:
   case Intrinsic::x86_sse41_ptestnzc:
@@ -21315,15 +21323,30 @@ SDValue X86TargetLowering::LowerINTRINSI
   case Intrinsic::x86_avx_vtestz_pd_256:
   case Intrinsic::x86_avx_vtestc_pd_256:
   case Intrinsic::x86_avx_vtestnzc_pd_256: {
-    bool IsTestPacked = false;
+    unsigned TestOpc = X86ISD::PTEST;
     X86::CondCode X86CC;
     switch (IntNo) {
     default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+    case Intrinsic::x86_avx512_ktestc_b:
+    case Intrinsic::x86_avx512_ktestc_w:
+    case Intrinsic::x86_avx512_ktestc_d:
+    case Intrinsic::x86_avx512_ktestc_q:
+      // CF = 1
+      TestOpc = X86ISD::KTEST;
+      X86CC = X86::COND_B;
+      break;
+    case Intrinsic::x86_avx512_ktestz_b:
+    case Intrinsic::x86_avx512_ktestz_w:
+    case Intrinsic::x86_avx512_ktestz_d:
+    case Intrinsic::x86_avx512_ktestz_q:
+      TestOpc = X86ISD::KTEST;
+      X86CC = X86::COND_E;
+      break;
     case Intrinsic::x86_avx_vtestz_ps:
     case Intrinsic::x86_avx_vtestz_pd:
     case Intrinsic::x86_avx_vtestz_ps_256:
     case Intrinsic::x86_avx_vtestz_pd_256:
-      IsTestPacked = true;
+      TestOpc = X86ISD::TESTP;
       LLVM_FALLTHROUGH;
     case Intrinsic::x86_sse41_ptestz:
     case Intrinsic::x86_avx_ptestz_256:
@@ -21334,7 +21357,7 @@ SDValue X86TargetLowering::LowerINTRINSI
     case Intrinsic::x86_avx_vtestc_pd:
     case Intrinsic::x86_avx_vtestc_ps_256:
     case Intrinsic::x86_avx_vtestc_pd_256:
-      IsTestPacked = true;
+      TestOpc = X86ISD::TESTP;
       LLVM_FALLTHROUGH;
     case Intrinsic::x86_sse41_ptestc:
     case Intrinsic::x86_avx_ptestc_256:
@@ -21345,7 +21368,7 @@ SDValue X86TargetLowering::LowerINTRINSI
     case Intrinsic::x86_avx_vtestnzc_pd:
     case Intrinsic::x86_avx_vtestnzc_ps_256:
     case Intrinsic::x86_avx_vtestnzc_pd_256:
-      IsTestPacked = true;
+      TestOpc = X86ISD::TESTP;
       LLVM_FALLTHROUGH;
     case Intrinsic::x86_sse41_ptestnzc:
     case Intrinsic::x86_avx_ptestnzc_256:
@@ -21356,7 +21379,6 @@ SDValue X86TargetLowering::LowerINTRINSI
 
     SDValue LHS = Op.getOperand(1);
     SDValue RHS = Op.getOperand(2);
-    unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
     SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
     SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
     return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);

Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=341259&r1=341258&r2=341259&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Fri Aug 31 14:31:53 2018
@@ -58,6 +58,74 @@ entry:
 }
 declare <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1>, <64 x i1>)
 
+define i32 @test_x86_avx512_ktestc_d(<32 x i16> %A, <32 x i16> %B) {
+; CHECK-LABEL: test_x86_avx512_ktestc_d:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0]
+; CHECK-NEXT:    vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9]
+; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT:    ktestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x99,0xc1]
+; CHECK-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %1 = icmp ne <32 x i16> %A, zeroinitializer
+  %2 = icmp ne <32 x i16> %B, zeroinitializer
+  %res = call i32 @llvm.x86.avx512.ktestc.d(<32 x i1> %1, <32 x i1> %2) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx512.ktestc.d(<32 x i1>, <32 x i1>) nounwind readnone
+
+define i32 @test_x86_avx512_ktestz_d(<32 x i16> %A, <32 x i16> %B) {
+; CHECK-LABEL: test_x86_avx512_ktestz_d:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0]
+; CHECK-NEXT:    vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9]
+; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT:    ktestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x99,0xc1]
+; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %1 = icmp ne <32 x i16> %A, zeroinitializer
+  %2 = icmp ne <32 x i16> %B, zeroinitializer
+  %res = call i32 @llvm.x86.avx512.ktestz.d(<32 x i1> %1, <32 x i1> %2) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx512.ktestz.d(<32 x i1>, <32 x i1>) nounwind readnone
+
+define i32 @test_x86_avx512_ktestc_q(<64 x i8> %A, <64 x i8> %B) {
+; CHECK-LABEL: test_x86_avx512_ktestc_q:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
+; CHECK-NEXT:    vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
+; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT:    ktestq %k1, %k0 # encoding: [0xc4,0xe1,0xf8,0x99,0xc1]
+; CHECK-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %1 = icmp ne <64 x i8> %A, zeroinitializer
+  %2 = icmp ne <64 x i8> %B, zeroinitializer
+  %res = call i32 @llvm.x86.avx512.ktestc.q(<64 x i1> %1, <64 x i1> %2) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx512.ktestc.q(<64 x i1>, <64 x i1>) nounwind readnone
+
+define i32 @test_x86_avx512_ktestz_q(<64 x i8> %A, <64 x i8> %B) {
+; CHECK-LABEL: test_x86_avx512_ktestz_q:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
+; CHECK-NEXT:    vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
+; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT:    ktestq %k1, %k0 # encoding: [0xc4,0xe1,0xf8,0x99,0xc1]
+; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %1 = icmp ne <64 x i8> %A, zeroinitializer
+  %2 = icmp ne <64 x i8> %B, zeroinitializer
+  %res = call i32 @llvm.x86.avx512.ktestz.q(<64 x i1> %1, <64 x i1> %2) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx512.ktestz.q(<64 x i1>, <64 x i1>) nounwind readnone
+
 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
 ; CHECK-LABEL: test_mask_packs_epi32_rr_512:
 ; CHECK:       # %bb.0:

Modified: llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll?rev=341259&r1=341258&r2=341259&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll Fri Aug 31 14:31:53 2018
@@ -48,6 +48,74 @@ entry:
 }
 declare <8 x i1> @llvm.x86.avx512.kadd.b(<8 x i1>, <8 x i1>)
 
+define i32 @test_x86_avx512_ktestc_w(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_x86_avx512_ktestc_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vptestmd %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
+; CHECK-NEXT:    vptestmd %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x27,0xc9]
+; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT:    ktestw %k1, %k0 # encoding: [0xc5,0xf8,0x99,0xc1]
+; CHECK-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %1 = icmp ne <16 x i32> %A, zeroinitializer
+  %2 = icmp ne <16 x i32> %B, zeroinitializer
+  %res = call i32 @llvm.x86.avx512.ktestc.w(<16 x i1> %1, <16 x i1> %2) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx512.ktestc.w(<16 x i1>, <16 x i1>) nounwind readnone
+
+define i32 @test_x86_avx512_ktestz_w(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_x86_avx512_ktestz_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vptestmd %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
+; CHECK-NEXT:    vptestmd %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x27,0xc9]
+; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT:    ktestw %k1, %k0 # encoding: [0xc5,0xf8,0x99,0xc1]
+; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %1 = icmp ne <16 x i32> %A, zeroinitializer
+  %2 = icmp ne <16 x i32> %B, zeroinitializer
+  %res = call i32 @llvm.x86.avx512.ktestz.w(<16 x i1> %1, <16 x i1> %2) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx512.ktestz.w(<16 x i1>, <16 x i1>) nounwind readnone
+
+define i32 @test_x86_avx512_ktestc_b(<8 x i64> %A, <8 x i64> %B) {
+; CHECK-LABEL: test_x86_avx512_ktestc_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0]
+; CHECK-NEXT:    vptestmq %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x27,0xc9]
+; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT:    ktestb %k1, %k0 # encoding: [0xc5,0xf9,0x99,0xc1]
+; CHECK-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %1 = icmp ne <8 x i64> %A, zeroinitializer
+  %2 = icmp ne <8 x i64> %B, zeroinitializer
+  %res = call i32 @llvm.x86.avx512.ktestc.b(<8 x i1> %1, <8 x i1> %2) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx512.ktestc.b(<8 x i1>, <8 x i1>) nounwind readnone
+
+define i32 @test_x86_avx512_ktestz_b(<8 x i64> %A, <8 x i64> %B) {
+; CHECK-LABEL: test_x86_avx512_ktestz_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0]
+; CHECK-NEXT:    vptestmq %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x27,0xc9]
+; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT:    ktestb %k1, %k0 # encoding: [0xc5,0xf9,0x99,0xc1]
+; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %1 = icmp ne <8 x i64> %A, zeroinitializer
+  %2 = icmp ne <8 x i64> %B, zeroinitializer
+  %res = call i32 @llvm.x86.avx512.ktestz.b(<8 x i1> %1, <8 x i1> %2) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx512.ktestz.b(<8 x i1>, <8 x i1>) nounwind readnone
+
 declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
 
 define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {




More information about the llvm-commits mailing list