[llvm] r350480 - [X86] Allow combinevxi1Bitcast to use pmovmskb on avx512 targets if the input is a truncate from v16i8/v32i8.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 5 13:40:07 PST 2019
Author: ctopper
Date: Sat Jan 5 13:40:07 2019
New Revision: 350480
URL: http://llvm.org/viewvc/llvm-project?rev=350480&view=rev
Log:
[X86] Allow combinevxi1Bitcast to use pmovmskb on avx512 targets if the input is a truncate from v16i8/v32i8.
This is especially helpful on targets without avx512bw since we don't have a good way to convert from v16i8/v32i8 to v16i1/v32i1 for the truncate anyway. If we're just going to convert it to a GPR we might as well use pmovmskb to accomplish both.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-ext.ll
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-128.ll
llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll
llvm/trunk/test/CodeGen/X86/bitcast-setcc-128.ll
llvm/trunk/test/CodeGen/X86/bitcast-setcc-256.ll
llvm/trunk/test/CodeGen/X86/broadcastm-lowering.ll
llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=350480&r1=350479&r2=350480&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Jan 5 13:40:07 2019
@@ -32737,9 +32737,17 @@ static SDValue combineBitcastvxi1(Select
if (!VT.isScalarInteger() || !VecVT.isSimple())
return SDValue();
+ // If the input is a truncate from v16i8 or v32i8 go ahead and use a
+ // movmskb even with avx512. This will be better than truncating to vXi1 and
+ // using a kmov. This can especially help KNL if the input is a v16i8/v32i8
+ // vpcmpeqb/vpcmpgtb.
+ bool IsTruncated = N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ (N0.getOperand(0).getValueType() == MVT::v16i8 ||
+ N0.getOperand(0).getValueType() == MVT::v32i8);
+
// With AVX512 vxi1 types are legal and we prefer using k-regs.
// MOVMSK is supported in SSE2 or later.
- if (Subtarget.hasAVX512() || !Subtarget.hasSSE2())
+ if (!Subtarget.hasSSE2() || (Subtarget.hasAVX512() && !IsTruncated))
return SDValue();
// There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and
Modified: llvm/trunk/test/CodeGen/X86/avx512-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-ext.ll?rev=350480&r1=350479&r2=350480&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-ext.ll Sat Jan 5 13:40:07 2019
@@ -1644,33 +1644,12 @@ define <8 x i64> @zext_8i1_to_8xi64(i8
}
define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
-; KNL-LABEL: trunc_16i8_to_16i1:
-; KNL: # %bb.0:
-; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; KNL-NEXT: vpslld $31, %zmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: # kill: def $ax killed $ax killed $eax
-; KNL-NEXT: vzeroupper
-; KNL-NEXT: retq
-;
-; SKX-LABEL: trunc_16i8_to_16i1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
-; SKX-NEXT: vpmovb2m %xmm0, %k0
-; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: # kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq
-;
-; AVX512DQNOBW-LABEL: trunc_16i8_to_16i1:
-; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k0
-; AVX512DQNOBW-NEXT: kmovw %k0, %eax
-; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512DQNOBW-NEXT: vzeroupper
-; AVX512DQNOBW-NEXT: retq
+; ALL-LABEL: trunc_16i8_to_16i1:
+; ALL: # %bb.0:
+; ALL-NEXT: vpsllw $7, %xmm0, %xmm0
+; ALL-NEXT: vpmovmskb %xmm0, %eax
+; ALL-NEXT: # kill: def $ax killed $ax killed $eax
+; ALL-NEXT: retq
%mask_b = trunc <16 x i8>%a to <16 x i1>
%mask = bitcast <16 x i1> %mask_b to i16
ret i16 %mask
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=350480&r1=350479&r2=350480&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Sat Jan 5 13:40:07 2019
@@ -4285,16 +4285,14 @@ define i16 @trunc_16i8_to_16i1(<16 x i8>
; GENERIC-LABEL: trunc_16i8_to_16i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
+; GENERIC-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: trunc_16i8_to_16i1:
; SKX: # %bb.0:
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
+; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: retq # sched: [7:1.00]
%mask_b = trunc <16 x i8>%a to <16 x i1>
Modified: llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-128.ll?rev=350480&r1=350479&r2=350480&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-128.ll Sat Jan 5 13:40:07 2019
@@ -159,11 +159,8 @@ define i16 @v16i8(<16 x i8> %a, <16 x i8
; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm1
; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: vpmovmskb %xmm0, %eax
; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v16i8:
Modified: llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll?rev=350480&r1=350479&r2=350480&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll Sat Jan 5 13:40:07 2019
@@ -399,15 +399,7 @@ define i32 @v32i8(<32 x i8> %a, <32 x i8
; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1
; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kmovw %k0, %ecx
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: shll $16, %eax
-; AVX512F-NEXT: orl %ecx, %eax
+; AVX512F-NEXT: vpmovmskb %ymm0, %eax
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/bitcast-setcc-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-setcc-128.ll?rev=350480&r1=350479&r2=350480&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-setcc-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-setcc-128.ll Sat Jan 5 13:40:07 2019
@@ -128,11 +128,8 @@ define i16 @v16i8(<16 x i8> %a, <16 x i8
; AVX512F-LABEL: v16i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: vpmovmskb %xmm0, %eax
; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v16i8:
Modified: llvm/trunk/test/CodeGen/X86/bitcast-setcc-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-setcc-256.ll?rev=350480&r1=350479&r2=350480&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-setcc-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-setcc-256.ll Sat Jan 5 13:40:07 2019
@@ -184,15 +184,7 @@ define i32 @v32i8(<32 x i8> %a, <32 x i8
; AVX512F-LABEL: v32i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kmovw %k0, %ecx
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: shll $16, %eax
-; AVX512F-NEXT: orl %ecx, %eax
+; AVX512F-NEXT: vpmovmskb %ymm0, %eax
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/broadcastm-lowering.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/broadcastm-lowering.ll?rev=350480&r1=350479&r2=350480&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/broadcastm-lowering.ll (original)
+++ llvm/trunk/test/CodeGen/X86/broadcastm-lowering.ll Sat Jan 5 13:40:07 2019
@@ -43,15 +43,9 @@ define <4 x i32> @test_mm_epi32(<16 x i8
; AVX512CD-LABEL: test_mm_epi32:
; AVX512CD: # %bb.0: # %entry
; AVX512CD-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512CD-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512CD-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512CD-NEXT: kmovw %k0, %eax
-; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; AVX512CD-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
-; AVX512CD-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
-; AVX512CD-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; AVX512CD-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
-; AVX512CD-NEXT: vzeroupper
+; AVX512CD-NEXT: vpmovmskb %xmm0, %eax
+; AVX512CD-NEXT: vmovd %eax, %xmm0
+; AVX512CD-NEXT: vpbroadcastd %xmm0, %xmm0
; AVX512CD-NEXT: retq
;
; AVX512VLCDBW-LABEL: test_mm_epi32:
Modified: llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll?rev=350480&r1=350479&r2=350480&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll Sat Jan 5 13:40:07 2019
@@ -22,13 +22,9 @@ define i1 @allones_v16i8_sign(<16 x i8>
;
; KNL-LABEL: allones_v16i8_sign:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
-; KNL-NEXT: setb %al
-; KNL-NEXT: vzeroupper
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: cmpw $-1, %ax
+; KNL-NEXT: sete %al
; KNL-NEXT: retq
;
; SKX-LABEL: allones_v16i8_sign:
@@ -60,13 +56,9 @@ define i1 @allzeros_v16i8_sign(<16 x i8>
;
; KNL-LABEL: allzeros_v16i8_sign:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: testw %ax, %ax
; KNL-NEXT: sete %al
-; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: allzeros_v16i8_sign:
@@ -117,18 +109,8 @@ define i1 @allones_v32i8_sign(<32 x i8>
;
; KNL-LABEL: allones_v32i8_sign:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: cmpl $-1, %ecx
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: cmpl $-1, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -180,17 +162,8 @@ define i1 @allzeros_v32i8_sign(<32 x i8>
;
; KNL-LABEL: allzeros_v32i8_sign:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: testl %eax, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -1438,14 +1411,10 @@ define i1 @allones_v16i8_and1(<16 x i8>
;
; KNL-LABEL: allones_v16i8_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
-; KNL-NEXT: setb %al
-; KNL-NEXT: vzeroupper
+; KNL-NEXT: vpsllw $7, %xmm0, %xmm0
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: cmpw $-1, %ax
+; KNL-NEXT: sete %al
; KNL-NEXT: retq
;
; SKX-LABEL: allones_v16i8_and1:
@@ -1480,14 +1449,10 @@ define i1 @allzeros_v16i8_and1(<16 x i8>
;
; KNL-LABEL: allzeros_v16i8_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
+; KNL-NEXT: vpsllw $7, %xmm0, %xmm0
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: testw %ax, %ax
; KNL-NEXT: sete %al
-; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: allzeros_v16i8_and1:
@@ -1546,19 +1511,9 @@ define i1 @allones_v32i8_and1(<32 x i8>
;
; KNL-LABEL: allones_v32i8_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: cmpl $-1, %ecx
+; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: cmpl $-1, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -1618,18 +1573,9 @@ define i1 @allzeros_v32i8_and1(<32 x i8>
;
; KNL-LABEL: allzeros_v32i8_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
+; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: testl %eax, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -3102,14 +3048,10 @@ define i1 @allones_v16i8_and4(<16 x i8>
;
; KNL-LABEL: allones_v16i8_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
-; KNL-NEXT: setb %al
-; KNL-NEXT: vzeroupper
+; KNL-NEXT: vpsllw $5, %xmm0, %xmm0
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: cmpw $-1, %ax
+; KNL-NEXT: sete %al
; KNL-NEXT: retq
;
; SKX-LABEL: allones_v16i8_and4:
@@ -3144,14 +3086,10 @@ define i1 @allzeros_v16i8_and4(<16 x i8>
;
; KNL-LABEL: allzeros_v16i8_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
+; KNL-NEXT: vpsllw $5, %xmm0, %xmm0
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: testw %ax, %ax
; KNL-NEXT: sete %al
-; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: allzeros_v16i8_and4:
@@ -3210,19 +3148,9 @@ define i1 @allones_v32i8_and4(<32 x i8>
;
; KNL-LABEL: allones_v32i8_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: cmpl $-1, %ecx
+; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: cmpl $-1, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -3282,18 +3210,9 @@ define i1 @allzeros_v32i8_and4(<32 x i8>
;
; KNL-LABEL: allzeros_v32i8_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
+; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: testl %eax, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -4926,12 +4845,7 @@ define i32 @movmskb(<16 x i8> %x) {
;
; KNL-LABEL: movmskb:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vzeroupper
+; KNL-NEXT: vpmovmskb %xmm0, %eax
; KNL-NEXT: retq
;
; SKX-LABEL: movmskb:
@@ -4975,17 +4889,7 @@ define i32 @movmskb256(<32 x i8> %x) {
;
; KNL-LABEL: movmskb256:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: shll $16, %eax
-; KNL-NEXT: orl %ecx, %eax
+; KNL-NEXT: vpmovmskb %ymm0, %eax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
More information about the llvm-commits
mailing list