[llvm] r324577 - [X86] Allow KORTEST instruction to be used for testing if a mask is all ones
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 7 23:54:16 PST 2018
Author: ctopper
Date: Wed Feb 7 23:54:16 2018
New Revision: 324577
URL: http://llvm.org/viewvc/llvm-project?rev=324577&view=rev
Log:
[X86] Allow KORTEST instruction to be used for testing if a mask is all ones
The KTEST instruction sets the C flag if the result of anding both operands together is all 1s. We can use this to lower (icmp eq/ne (bitcast (vXi1 X), -1)
Differential Revision: https://reviews.llvm.org/D42772
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
llvm/trunk/test/CodeGen/X86/setcc-lowering.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=324577&r1=324576&r2=324577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Feb 7 23:54:16 2018
@@ -18135,18 +18135,22 @@ static SDValue EmitKTEST(SDValue Op0, SD
Op0 = Op0.getOperand(0);
MVT VT = Op0.getSimpleValueType();
- if (!(Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1)) &&
+ if (!(Subtarget.hasAVX512() && VT == MVT::v16i1) &&
+ !(Subtarget.hasDQI() && VT == MVT::v8i1) &&
!(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)))
return SDValue();
X86::CondCode X86CC;
if (isNullConstant(Op1)) {
X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
+ } else if (isAllOnesConstant(Op1)) {
+ // C flag is set for all ones.
+ X86CC = CC == ISD::SETEQ ? X86::COND_B : X86::COND_AE;
} else
return SDValue();
- SDValue KTEST = DAG.getNode(X86ISD::KTEST, dl, MVT::i32, Op0, Op0);
- return getSETCC(X86CC, KTEST, dl, DAG);
+ SDValue KORTEST = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, Op0, Op0);
+ return getSETCC(X86CC, KORTEST, dl, DAG);
}
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=324577&r1=324576&r2=324577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Wed Feb 7 23:54:16 2018
@@ -584,7 +584,7 @@ define void @test7(<8 x i1> %mask) {
; SKX-NEXT: movb $85, %al
; SKX-NEXT: kmovd %eax, %k1
; SKX-NEXT: korb %k1, %k0, %k0
-; SKX-NEXT: ktestb %k0, %k0
+; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: retq
;
; AVX512BW-LABEL: test7:
@@ -607,7 +607,7 @@ define void @test7(<8 x i1> %mask) {
; AVX512DQ-NEXT: movb $85, %al
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: korb %k1, %k0, %k0
-; AVX512DQ-NEXT: ktestb %k0, %k0
+; AVX512DQ-NEXT: kortestb %k0, %k0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
allocas:
@@ -1673,7 +1673,7 @@ define void @ktest_1(<8 x double> %in, d
; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1
; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
-; SKX-NEXT: ktestb %k0, %k0
+; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: je LBB42_2
; SKX-NEXT: ## %bb.1: ## %L1
; SKX-NEXT: vmovapd %zmm0, (%rdi)
@@ -1708,7 +1708,7 @@ define void @ktest_1(<8 x double> %in, d
; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
; AVX512DQ-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
-; AVX512DQ-NEXT: ktestb %k0, %k0
+; AVX512DQ-NEXT: kortestb %k0, %k0
; AVX512DQ-NEXT: je LBB42_2
; AVX512DQ-NEXT: ## %bb.1: ## %L1
; AVX512DQ-NEXT: vmovapd %zmm0, (%rdi)
@@ -1788,7 +1788,7 @@ define void @ktest_2(<32 x float> %in, f
; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2
; SKX-NEXT: kunpckwd %k1, %k2, %k1
; SKX-NEXT: kord %k1, %k0, %k0
-; SKX-NEXT: ktestd %k0, %k0
+; SKX-NEXT: kortestd %k0, %k0
; SKX-NEXT: je LBB43_2
; SKX-NEXT: ## %bb.1: ## %L1
; SKX-NEXT: vmovaps %zmm0, (%rdi)
@@ -1814,7 +1814,7 @@ define void @ktest_2(<32 x float> %in, f
; AVX512BW-NEXT: vcmpltps %zmm2, %zmm1, %k2
; AVX512BW-NEXT: kunpckwd %k1, %k2, %k1
; AVX512BW-NEXT: kord %k1, %k0, %k0
-; AVX512BW-NEXT: ktestd %k0, %k0
+; AVX512BW-NEXT: kortestd %k0, %k0
; AVX512BW-NEXT: je LBB43_2
; AVX512BW-NEXT: ## %bb.1: ## %L1
; AVX512BW-NEXT: vmovaps %zmm0, (%rdi)
@@ -2786,3 +2786,32 @@ bb.2:
}
declare void @foo()
+; Make sure we can use the C flag from kortest to check for all ones.
+define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
+; CHECK-LABEL: ktest_allones:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; CHECK-NEXT: kortestw %k0, %k0
+; CHECK-NEXT: jb LBB65_2
+; CHECK-NEXT: ## %bb.1: ## %bb.1
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq _foo
+; CHECK-NEXT: LBB65_2: ## %bb.2
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %a = icmp eq <16 x i32> %x, zeroinitializer
+ %b = icmp eq <16 x i32> %y, zeroinitializer
+ %c = and <16 x i1> %a, %b
+ %d = bitcast <16 x i1> %c to i16
+ %e = icmp eq i16 %d, -1
+ br i1 %e, label %bb.2, label %bb.1
+bb.1:
+ call void @foo()
+ br label %bb.2
+bb.2:
+ ret void
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=324577&r1=324576&r2=324577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Wed Feb 7 23:54:16 2018
@@ -7031,7 +7031,7 @@ define void @vcmp_test7(<8 x i1> %mask)
; GENERIC-NEXT: movb $85, %al # sched: [1:0.33]
; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
; GENERIC-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: ktestb %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kortestb %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vcmp_test7:
@@ -7041,7 +7041,7 @@ define void @vcmp_test7(<8 x i1> %mask)
; SKX-NEXT: movb $85, %al # sched: [1:0.25]
; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: ktestb %k0, %k0 # sched: [3:1.00]
+; SKX-NEXT: kortestb %k0, %k0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
allocas:
%a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
@@ -7615,7 +7615,7 @@ define void @ktest_1(<8 x double> %in, d
; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [4:0.50]
; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT: ktestb %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kortestb %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: je .LBB410_2 # sched: [1:1.00]
; GENERIC-NEXT: # %bb.1: # %L1
; GENERIC-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00]
@@ -7632,7 +7632,7 @@ define void @ktest_1(<8 x double> %in, d
; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [8:0.50]
; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
-; SKX-NEXT: ktestb %k0, %k0 # sched: [3:1.00]
+; SKX-NEXT: kortestb %k0, %k0 # sched: [3:1.00]
; SKX-NEXT: je .LBB410_2 # sched: [1:0.50]
; SKX-NEXT: # %bb.1: # %L1
; SKX-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00]
@@ -7684,7 +7684,7 @@ define void @ktest_2(<32 x float> %in, f
; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
; GENERIC-NEXT: kunpckwd %k1, %k2, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kord %k1, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: ktestd %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kortestd %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: je .LBB411_2 # sched: [1:1.00]
; GENERIC-NEXT: # %bb.1: # %L1
; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
@@ -7710,7 +7710,7 @@ define void @ktest_2(<32 x float> %in, f
; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
; SKX-NEXT: kunpckwd %k1, %k2, %k1 # sched: [3:1.00]
; SKX-NEXT: kord %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: ktestd %k0, %k0 # sched: [3:1.00]
+; SKX-NEXT: kortestd %k0, %k0 # sched: [3:1.00]
; SKX-NEXT: je .LBB411_2 # sched: [1:0.50]
; SKX-NEXT: # %bb.1: # %L1
; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
Modified: llvm/trunk/test/CodeGen/X86/setcc-lowering.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/setcc-lowering.ll?rev=324577&r1=324576&r2=324577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/setcc-lowering.ll (original)
+++ llvm/trunk/test/CodeGen/X86/setcc-lowering.ll Wed Feb 7 23:54:16 2018
@@ -84,8 +84,7 @@ define void @pr26232(i64 %a, <16 x i1> %
; KNL-32-NEXT: cmovlw %dx, %si
; KNL-32-NEXT: kmovw %esi, %k1
; KNL-32-NEXT: kandw %k0, %k1, %k1
-; KNL-32-NEXT: kmovw %k1, %esi
-; KNL-32-NEXT: testw %si, %si
+; KNL-32-NEXT: kortestw %k1, %k1
; KNL-32-NEXT: jne .LBB1_1
; KNL-32-NEXT: # %bb.2: # %for_exit600
; KNL-32-NEXT: popl %esi
More information about the llvm-commits
mailing list