[llvm] r350481 - [X86] Use two pmovmskbs in combineBitcastvxi1 for (i64 (bitcast (v64i1 (truncate (v64i8)))) on KNL.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 5 14:42:58 PST 2019
Author: ctopper
Date: Sat Jan 5 14:42:58 2019
New Revision: 350481
URL: http://llvm.org/viewvc/llvm-project?rev=350481&view=rev
Log:
[X86] Use two pmovmskbs in combineBitcastvxi1 for (i64 (bitcast (v64i1 (truncate (v64i8)))) on KNL.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll
llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=350481&r1=350480&r2=350481&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Jan 5 14:42:58 2019
@@ -32743,7 +32743,8 @@ static SDValue combineBitcastvxi1(Select
// vpcmpeqb/vpcmpgtb.
bool IsTruncated = N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
(N0.getOperand(0).getValueType() == MVT::v16i8 ||
- N0.getOperand(0).getValueType() == MVT::v32i8);
+ N0.getOperand(0).getValueType() == MVT::v32i8 ||
+ N0.getOperand(0).getValueType() == MVT::v64i8);
// With AVX512 vxi1 types are legal and we prefer using k-regs.
// MOVMSK is supported in SSE2 or later.
@@ -32799,12 +32800,30 @@ static SDValue combineBitcastvxi1(Select
case MVT::v32i1:
SExtVT = MVT::v32i8;
break;
+ case MVT::v64i1:
+ // If we have AVX512F, but not AVX512BW and the input is truncated from
+ // v64i8 checked earlier. Then split the input and make two pmovmskbs.
+ if (Subtarget.hasAVX512() && !Subtarget.hasBWI()) {
+ SExtVT = MVT::v64i8;
+ break;
+ }
+ return SDValue();
};
SDLoc DL(BitCast);
SDValue V = DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, N0);
- if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8) {
+ if (SExtVT == MVT::v64i8) {
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(V, DL);
+ Lo = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Lo);
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Lo);
+ Hi = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Hi);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Hi);
+ Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
+ DAG.getConstant(32, DL, MVT::i8));
+ V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
+ } else if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8) {
V = getPMOVMSKB(DL, V, DAG, Subtarget);
} else {
if (SExtVT == MVT::v8i16)
Modified: llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll?rev=350481&r1=350480&r2=350481&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll Sat Jan 5 14:42:58 2019
@@ -256,26 +256,10 @@ define i64 @v64i8(<64 x i8> %a, <64 x i8
;
; AVX512F-LABEL: v64i8:
; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm2
-; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %ecx
-; AVX512F-NEXT: shll $16, %ecx
-; AVX512F-NEXT: orl %eax, %ecx
-; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kmovw %k0, %edx
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: shll $16, %eax
-; AVX512F-NEXT: orl %edx, %eax
+; AVX512F-NEXT: vpmovmskb %ymm0, %ecx
+; AVX512F-NEXT: vpmovmskb %ymm1, %eax
; AVX512F-NEXT: shlq $32, %rax
; AVX512F-NEXT: orq %rcx, %rax
; AVX512F-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll?rev=350481&r1=350480&r2=350481&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll Sat Jan 5 14:42:58 2019
@@ -235,30 +235,11 @@ define i1 @allones_v64i8_sign(<64 x i8>
;
; KNL-LABEL: allones_v64i8_sign:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm3
-; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: shll $16, %edx
-; KNL-NEXT: orl %eax, %edx
-; KNL-NEXT: shlq $32, %rdx
-; KNL-NEXT: orq %rcx, %rdx
-; KNL-NEXT: cmpq $-1, %rdx
+; KNL-NEXT: vpmovmskb %ymm1, %eax
+; KNL-NEXT: shlq $32, %rax
+; KNL-NEXT: vpmovmskb %ymm0, %ecx
+; KNL-NEXT: orq %rax, %rcx
+; KNL-NEXT: cmpq $-1, %rcx
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -327,29 +308,10 @@ define i1 @allzeros_v64i8_sign(<64 x i8>
;
; KNL-LABEL: allzeros_v64i8_sign:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm3
-; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: shll $16, %edx
-; KNL-NEXT: orl %eax, %edx
-; KNL-NEXT: shlq $32, %rdx
-; KNL-NEXT: orq %rcx, %rdx
+; KNL-NEXT: vpmovmskb %ymm1, %eax
+; KNL-NEXT: shlq $32, %rax
+; KNL-NEXT: vpmovmskb %ymm0, %ecx
+; KNL-NEXT: orq %rax, %rcx
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -1662,32 +1624,13 @@ define i1 @allones_v64i8_and1(<64 x i8>
;
; KNL-LABEL: allones_v64i8_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
-; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm3
-; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: shll $16, %edx
-; KNL-NEXT: orl %eax, %edx
-; KNL-NEXT: shlq $32, %rdx
-; KNL-NEXT: orq %rcx, %rdx
-; KNL-NEXT: cmpq $-1, %rdx
+; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vpsllw $7, %ymm1, %ymm1
+; KNL-NEXT: vpmovmskb %ymm1, %eax
+; KNL-NEXT: shlq $32, %rax
+; KNL-NEXT: vpmovmskb %ymm0, %ecx
+; KNL-NEXT: orq %rax, %rcx
+; KNL-NEXT: cmpq $-1, %rcx
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -1771,31 +1714,12 @@ define i1 @allzeros_v64i8_and1(<64 x i8>
;
; KNL-LABEL: allzeros_v64i8_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
-; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm3
-; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: shll $16, %edx
-; KNL-NEXT: orl %eax, %edx
-; KNL-NEXT: shlq $32, %rdx
-; KNL-NEXT: orq %rcx, %rdx
+; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vpsllw $7, %ymm1, %ymm1
+; KNL-NEXT: vpmovmskb %ymm1, %eax
+; KNL-NEXT: shlq $32, %rax
+; KNL-NEXT: vpmovmskb %ymm0, %ecx
+; KNL-NEXT: orq %rax, %rcx
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -3299,32 +3223,13 @@ define i1 @allones_v64i8_and4(<64 x i8>
;
; KNL-LABEL: allones_v64i8_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
-; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm3
-; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: shll $16, %edx
-; KNL-NEXT: orl %eax, %edx
-; KNL-NEXT: shlq $32, %rdx
-; KNL-NEXT: orq %rcx, %rdx
-; KNL-NEXT: cmpq $-1, %rdx
+; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
+; KNL-NEXT: vpsllw $5, %ymm1, %ymm1
+; KNL-NEXT: vpmovmskb %ymm1, %eax
+; KNL-NEXT: shlq $32, %rax
+; KNL-NEXT: vpmovmskb %ymm0, %ecx
+; KNL-NEXT: orq %rax, %rcx
+; KNL-NEXT: cmpq $-1, %rcx
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -3408,31 +3313,12 @@ define i1 @allzeros_v64i8_and4(<64 x i8>
;
; KNL-LABEL: allzeros_v64i8_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
-; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm3
-; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: shll $16, %edx
-; KNL-NEXT: orl %eax, %edx
-; KNL-NEXT: shlq $32, %rdx
-; KNL-NEXT: orq %rcx, %rdx
+; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
+; KNL-NEXT: vpsllw $5, %ymm1, %ymm1
+; KNL-NEXT: vpmovmskb %ymm1, %eax
+; KNL-NEXT: shlq $32, %rax
+; KNL-NEXT: vpmovmskb %ymm0, %ecx
+; KNL-NEXT: orq %rax, %rcx
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
More information about the llvm-commits
mailing list