[llvm] r324577 - [X86] Allow KORTEST instruction to be used for testing if a mask is all ones

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 7 23:54:16 PST 2018


Author: ctopper
Date: Wed Feb  7 23:54:16 2018
New Revision: 324577

URL: http://llvm.org/viewvc/llvm-project?rev=324577&view=rev
Log:
[X86] Allow KORTEST instruction to be used for testing if a mask is all ones

The KTEST instruction sets the C flag if the result of anding both operands together is all 1s. We can use this to lower (icmp eq/ne (bitcast (vXi1 X), -1)

Differential Revision: https://reviews.llvm.org/D42772

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
    llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
    llvm/trunk/test/CodeGen/X86/setcc-lowering.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=324577&r1=324576&r2=324577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Feb  7 23:54:16 2018
@@ -18135,18 +18135,22 @@ static SDValue EmitKTEST(SDValue Op0, SD
 
   Op0 = Op0.getOperand(0);
   MVT VT = Op0.getSimpleValueType();
-  if (!(Subtarget.hasDQI() && (VT == MVT::v8i1  || VT == MVT::v16i1)) &&
+  if (!(Subtarget.hasAVX512() && VT == MVT::v16i1) &&
+      !(Subtarget.hasDQI() && VT == MVT::v8i1) &&
       !(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)))
     return SDValue();
 
   X86::CondCode X86CC;
   if (isNullConstant(Op1)) {
     X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
+  } else if (isAllOnesConstant(Op1)) {
+    // C flag is set for all ones.
+    X86CC = CC == ISD::SETEQ ? X86::COND_B : X86::COND_AE;
   } else
     return SDValue();
 
-  SDValue KTEST = DAG.getNode(X86ISD::KTEST, dl, MVT::i32, Op0, Op0);
-  return getSETCC(X86CC, KTEST, dl, DAG);
+  SDValue KORTEST = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, Op0, Op0);
+  return getSETCC(X86CC, KORTEST, dl, DAG);
 }
 
 SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {

Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=324577&r1=324576&r2=324577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Wed Feb  7 23:54:16 2018
@@ -584,7 +584,7 @@ define void @test7(<8 x i1> %mask)  {
 ; SKX-NEXT:    movb $85, %al
 ; SKX-NEXT:    kmovd %eax, %k1
 ; SKX-NEXT:    korb %k1, %k0, %k0
-; SKX-NEXT:    ktestb %k0, %k0
+; SKX-NEXT:    kortestb %k0, %k0
 ; SKX-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test7:
@@ -607,7 +607,7 @@ define void @test7(<8 x i1> %mask)  {
 ; AVX512DQ-NEXT:    movb $85, %al
 ; AVX512DQ-NEXT:    kmovw %eax, %k1
 ; AVX512DQ-NEXT:    korb %k1, %k0, %k0
-; AVX512DQ-NEXT:    ktestb %k0, %k0
+; AVX512DQ-NEXT:    kortestb %k0, %k0
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 allocas:
@@ -1673,7 +1673,7 @@ define void @ktest_1(<8 x double> %in, d
 ; SKX-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
 ; SKX-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
 ; SKX-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
-; SKX-NEXT:    ktestb %k0, %k0
+; SKX-NEXT:    kortestb %k0, %k0
 ; SKX-NEXT:    je LBB42_2
 ; SKX-NEXT:  ## %bb.1: ## %L1
 ; SKX-NEXT:    vmovapd %zmm0, (%rdi)
@@ -1708,7 +1708,7 @@ define void @ktest_1(<8 x double> %in, d
 ; AVX512DQ-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
 ; AVX512DQ-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
 ; AVX512DQ-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
-; AVX512DQ-NEXT:    ktestb %k0, %k0
+; AVX512DQ-NEXT:    kortestb %k0, %k0
 ; AVX512DQ-NEXT:    je LBB42_2
 ; AVX512DQ-NEXT:  ## %bb.1: ## %L1
 ; AVX512DQ-NEXT:    vmovapd %zmm0, (%rdi)
@@ -1788,7 +1788,7 @@ define void @ktest_2(<32 x float> %in, f
 ; SKX-NEXT:    vcmpltps %zmm2, %zmm1, %k2
 ; SKX-NEXT:    kunpckwd %k1, %k2, %k1
 ; SKX-NEXT:    kord %k1, %k0, %k0
-; SKX-NEXT:    ktestd %k0, %k0
+; SKX-NEXT:    kortestd %k0, %k0
 ; SKX-NEXT:    je LBB43_2
 ; SKX-NEXT:  ## %bb.1: ## %L1
 ; SKX-NEXT:    vmovaps %zmm0, (%rdi)
@@ -1814,7 +1814,7 @@ define void @ktest_2(<32 x float> %in, f
 ; AVX512BW-NEXT:    vcmpltps %zmm2, %zmm1, %k2
 ; AVX512BW-NEXT:    kunpckwd %k1, %k2, %k1
 ; AVX512BW-NEXT:    kord %k1, %k0, %k0
-; AVX512BW-NEXT:    ktestd %k0, %k0
+; AVX512BW-NEXT:    kortestd %k0, %k0
 ; AVX512BW-NEXT:    je LBB43_2
 ; AVX512BW-NEXT:  ## %bb.1: ## %L1
 ; AVX512BW-NEXT:    vmovaps %zmm0, (%rdi)
@@ -2786,3 +2786,32 @@ bb.2:
 }
 declare void @foo()
 
+; Make sure we can use the C flag from kortest to check for all ones.
+define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
+; CHECK-LABEL: ktest_allones:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vporq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k0
+; CHECK-NEXT:    kortestw %k0, %k0
+; CHECK-NEXT:    jb LBB65_2
+; CHECK-NEXT:  ## %bb.1: ## %bb.1
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    callq _foo
+; CHECK-NEXT:  LBB65_2: ## %bb.2
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %a = icmp eq <16 x i32> %x, zeroinitializer
+  %b = icmp eq <16 x i32> %y, zeroinitializer
+  %c = and <16 x i1> %a, %b
+  %d = bitcast <16 x i1> %c to i16
+  %e = icmp eq i16 %d, -1
+  br i1 %e, label %bb.2, label %bb.1
+bb.1:
+  call void @foo()
+  br label %bb.2
+bb.2:
+  ret void
+}

Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=324577&r1=324576&r2=324577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Wed Feb  7 23:54:16 2018
@@ -7031,7 +7031,7 @@ define void @vcmp_test7(<8 x i1> %mask)
 ; GENERIC-NEXT:    movb $85, %al # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %eax, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    korb %k1, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT:    ktestb %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kortestb %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vcmp_test7:
@@ -7041,7 +7041,7 @@ define void @vcmp_test7(<8 x i1> %mask)
 ; SKX-NEXT:    movb $85, %al # sched: [1:0.25]
 ; SKX-NEXT:    kmovd %eax, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    korb %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT:    ktestb %k0, %k0 # sched: [3:1.00]
+; SKX-NEXT:    kortestb %k0, %k0 # sched: [3:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 allocas:
   %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
@@ -7615,7 +7615,7 @@ define void @ktest_1(<8 x double> %in, d
 ; GENERIC-NEXT:    vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [4:0.50]
 ; GENERIC-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT:    ktestb %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kortestb %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    je .LBB410_2 # sched: [1:1.00]
 ; GENERIC-NEXT:  # %bb.1: # %L1
 ; GENERIC-NEXT:    vmovapd %zmm0, (%rdi) # sched: [1:1.00]
@@ -7632,7 +7632,7 @@ define void @ktest_1(<8 x double> %in, d
 ; SKX-NEXT:    vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
 ; SKX-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [8:0.50]
 ; SKX-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
-; SKX-NEXT:    ktestb %k0, %k0 # sched: [3:1.00]
+; SKX-NEXT:    kortestb %k0, %k0 # sched: [3:1.00]
 ; SKX-NEXT:    je .LBB410_2 # sched: [1:0.50]
 ; SKX-NEXT:  # %bb.1: # %L1
 ; SKX-NEXT:    vmovapd %zmm0, (%rdi) # sched: [1:1.00]
@@ -7684,7 +7684,7 @@ define void @ktest_2(<32 x float> %in, f
 ; GENERIC-NEXT:    vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
 ; GENERIC-NEXT:    kunpckwd %k1, %k2, %k1 # sched: [1:1.00]
 ; GENERIC-NEXT:    kord %k1, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT:    ktestd %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kortestd %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    je .LBB411_2 # sched: [1:1.00]
 ; GENERIC-NEXT:  # %bb.1: # %L1
 ; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
@@ -7710,7 +7710,7 @@ define void @ktest_2(<32 x float> %in, f
 ; SKX-NEXT:    vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
 ; SKX-NEXT:    kunpckwd %k1, %k2, %k1 # sched: [3:1.00]
 ; SKX-NEXT:    kord %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT:    ktestd %k0, %k0 # sched: [3:1.00]
+; SKX-NEXT:    kortestd %k0, %k0 # sched: [3:1.00]
 ; SKX-NEXT:    je .LBB411_2 # sched: [1:0.50]
 ; SKX-NEXT:  # %bb.1: # %L1
 ; SKX-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]

Modified: llvm/trunk/test/CodeGen/X86/setcc-lowering.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/setcc-lowering.ll?rev=324577&r1=324576&r2=324577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/setcc-lowering.ll (original)
+++ llvm/trunk/test/CodeGen/X86/setcc-lowering.ll Wed Feb  7 23:54:16 2018
@@ -84,8 +84,7 @@ define void @pr26232(i64 %a, <16 x i1> %
 ; KNL-32-NEXT:    cmovlw %dx, %si
 ; KNL-32-NEXT:    kmovw %esi, %k1
 ; KNL-32-NEXT:    kandw %k0, %k1, %k1
-; KNL-32-NEXT:    kmovw %k1, %esi
-; KNL-32-NEXT:    testw %si, %si
+; KNL-32-NEXT:    kortestw %k1, %k1
 ; KNL-32-NEXT:    jne .LBB1_1
 ; KNL-32-NEXT:  # %bb.2: # %for_exit600
 ; KNL-32-NEXT:    popl %esi




More information about the llvm-commits mailing list