[llvm] 846d0ac - [X86] Don't disable code in combineHorizontalPredicateResult just because we have avx512
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 11 14:38:26 PST 2020
Author: Craig Topper
Date: 2020-02-11T14:36:29-08:00
New Revision: 846d0ac43eb95c7f2f7e98b076b42c8c3208f506
URL: https://github.com/llvm/llvm-project/commit/846d0ac43eb95c7f2f7e98b076b42c8c3208f506
DIFF: https://github.com/llvm/llvm-project/commit/846d0ac43eb95c7f2f7e98b076b42c8c3208f506.diff
LOG: [X86] Don't disable code in combineHorizontalPredicateResult just because we have avx512
We aren't doing a good job of optimizing AVX512 outside of this code. So remove the bail out for AVX512 and replace with a FIXME. This at least gets us the AVX2 codegen.
Differential Revision: https://reviews.llvm.org/D74431
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-compare-all_of.ll
llvm/test/CodeGen/X86/vector-compare-any_of.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f5a101196d09..4321bb79d857 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37359,10 +37359,7 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract,
return SDValue();
Movmsk = DAG.getZExtOrTrunc(Movmsk, DL, NumElts > 32 ? MVT::i64 : MVT::i32);
} else {
- // Bail with AVX512VL (which uses predicate registers).
- if (Subtarget.hasVLX())
- return SDValue();
-
+ // FIXME: Better handling of k-registers or 512-bit vectors?
unsigned MatchSizeInBits = Match.getValueSizeInBits();
if (!(MatchSizeInBits == 128 ||
(MatchSizeInBits == 256 && Subtarget.hasAVX())))
diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
index 02c9f4f17fd4..4348edf5b7c2 100644
--- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
@@ -28,9 +28,11 @@ define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
; AVX512-LABEL: test_v2f64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vandpd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vmovmskpd %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $3, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negq %rax
; AVX512-NEXT: retq
%c = fcmp ogt <2 x double> %a0, %a1
%s = sext <2 x i1> %c to <2 x i64>
@@ -67,11 +69,11 @@ define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
; AVX512-LABEL: test_v4f64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vandpd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vandpd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vmovmskpd %ymm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $15, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negq %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = fcmp ogt <4 x double> %a0, %a1
@@ -115,12 +117,11 @@ define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: cltq
+; AVX512-NEXT: vmovmskps %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $15, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negq %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = fcmp ogt <4 x double> %a0, %a1
@@ -158,11 +159,11 @@ define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
; AVX512-LABEL: test_v4f32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vmovmskps %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $15, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negl %eax
; AVX512-NEXT: retq
%c = fcmp ogt <4 x float> %a0, %a1
%s = sext <4 x i1> %c to <4 x i32>
@@ -201,13 +202,11 @@ define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
; AVX512-LABEL: test_v8f32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vmovmskps %ymm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $255, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negl %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = fcmp ogt <8 x float> %a0, %a1
@@ -252,14 +251,11 @@ define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0
; AVX512-NEXT: vpmovm2w %k0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: cwtl
+; AVX512-NEXT: vpmovmskb %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negl %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = fcmp ogt <8 x float> %a0, %a1
@@ -299,9 +295,11 @@ define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
; AVX512-LABEL: test_v2i64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vmovmskpd %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $3, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negq %rax
; AVX512-NEXT: retq
%c = icmp sgt <2 x i64> %a0, %a1
%s = sext <2 x i1> %c to <2 x i64>
@@ -353,11 +351,11 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
; AVX512-LABEL: test_v4i64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vmovmskpd %ymm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $15, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negq %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <4 x i64> %a0, %a1
@@ -416,12 +414,11 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: cltq
+; AVX512-NEXT: vmovmskps %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $15, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negq %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <4 x i64> %a0, %a1
@@ -459,11 +456,11 @@ define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
; AVX512-LABEL: test_v4i32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vmovmskps %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $15, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negl %eax
; AVX512-NEXT: retq
%c = icmp sgt <4 x i32> %a0, %a1
%s = sext <4 x i1> %c to <4 x i32>
@@ -517,13 +514,11 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
; AVX512-LABEL: test_v8i32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vmovmskps %ymm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $255, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negl %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <8 x i32> %a0, %a1
@@ -583,14 +578,11 @@ define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
; AVX512-NEXT: vpmovm2w %k0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: cwtl
+; AVX512-NEXT: vpmovmskb %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negl %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <8 x i32> %a0, %a1
@@ -632,13 +624,11 @@ define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
; AVX512-LABEL: test_v8i16_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vpmovmskb %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negl %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
%c = icmp sgt <8 x i16> %a0, %a1
@@ -698,15 +688,11 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
; AVX512-LABEL: test_v16i16_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vpmovmskb %ymm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $-1, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negl %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -772,15 +758,10 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
; AVX512-NEXT: vpmovm2b %k0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vpmovmskb %xmm0, %eax
+; AVX512-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negb %al
; AVX512-NEXT: movsbl %al, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
@@ -822,16 +803,10 @@ define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
; AVX512-LABEL: test_v16i8_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
-; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: vpmovmskb %xmm0, %eax
+; AVX512-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negb %al
; AVX512-NEXT: retq
%c = icmp sgt <16 x i8> %a0, %a1
%s = sext <16 x i1> %c to <16 x i8>
@@ -886,18 +861,10 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
; AVX512-LABEL: test_v32i8_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
-; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: vpmovmskb %ymm0, %eax
+; AVX512-NEXT: cmpl $-1, %eax
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negb %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <32 x i8> %a0, %a1
diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
index 084de61dd086..b3443b9707b7 100644
--- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
@@ -24,9 +24,9 @@ define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
; AVX512-LABEL: test_v2f64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vmovmskpd %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: retq
%c = fcmp ogt <2 x double> %a0, %a1
%s = sext <2 x i1> %c to <2 x i64>
@@ -59,11 +59,9 @@ define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
; AVX512-LABEL: test_v4f64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vmovmskpd %ymm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = fcmp ogt <4 x double> %a0, %a1
@@ -103,12 +101,9 @@ define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: cltq
+; AVX512-NEXT: vmovmskps %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = fcmp ogt <4 x double> %a0, %a1
@@ -142,11 +137,9 @@ define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
; AVX512-LABEL: test_v4f32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vmovmskps %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: retq
%c = fcmp ogt <4 x float> %a0, %a1
%s = sext <4 x i1> %c to <4 x i32>
@@ -181,13 +174,9 @@ define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
; AVX512-LABEL: test_v8f32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vmovmskps %ymm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = fcmp ogt <8 x float> %a0, %a1
@@ -228,14 +217,9 @@ define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0
; AVX512-NEXT: vpmovm2w %k0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: cwtl
+; AVX512-NEXT: vpmovmskb %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = fcmp ogt <8 x float> %a0, %a1
@@ -271,9 +255,9 @@ define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
; AVX512-LABEL: test_v2i64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vmovmskpd %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: retq
%c = icmp sgt <2 x i64> %a0, %a1
%s = sext <2 x i1> %c to <2 x i64>
@@ -319,11 +303,9 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
; AVX512-LABEL: test_v4i64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vmovmskpd %ymm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <4 x i64> %a0, %a1
@@ -376,12 +358,9 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: cltq
+; AVX512-NEXT: vmovmskps %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <4 x i64> %a0, %a1
@@ -415,11 +394,9 @@ define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
; AVX512-LABEL: test_v4i32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vmovmskps %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: retq
%c = icmp sgt <4 x i32> %a0, %a1
%s = sext <4 x i1> %c to <4 x i32>
@@ -467,13 +444,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
; AVX512-LABEL: test_v8i32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vmovmskps %ymm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <8 x i32> %a0, %a1
@@ -527,14 +500,9 @@ define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
; AVX512-NEXT: vpmovm2w %k0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: cwtl
+; AVX512-NEXT: vpmovmskb %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <8 x i32> %a0, %a1
@@ -572,13 +540,9 @@ define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
; AVX512-LABEL: test_v8i16_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vpmovmskb %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
%c = icmp sgt <8 x i16> %a0, %a1
@@ -632,15 +596,9 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
; AVX512-LABEL: test_v16i16_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vpmovmskb %ymm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -700,16 +658,9 @@ define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
; AVX512-NEXT: vpmovm2b %k0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
-; AVX512-NEXT: movsbl %al, %eax
+; AVX512-NEXT: vpmovmskb %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -750,15 +701,9 @@ define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
; AVX512-LABEL: test_v16i8_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vpmovmskb %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%c = icmp sgt <16 x i8> %a0, %a1
@@ -814,17 +759,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
; AVX512-LABEL: test_v32i8_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vpmovmskb %ymm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
More information about the llvm-commits
mailing list