[llvm] 3a0c1d5 - [X86] combineSetCCMOVMSK - fold anyof/noneof movmskps/movmskpd -> testps/testpd

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 14 07:37:58 PDT 2023


Author: Simon Pilgrim
Date: 2023-04-14T15:37:40+01:00
New Revision: 3a0c1d5ab9f938f615308436fb9572b3fdab7f24

URL: https://github.com/llvm/llvm-project/commit/3a0c1d5ab9f938f615308436fb9572b3fdab7f24
DIFF: https://github.com/llvm/llvm-project/commit/3a0c1d5ab9f938f615308436fb9572b3fdab7f24.diff

LOG: [X86] combineSetCCMOVMSK - fold anyof/noneof movmskps/movmskpd -> testps/testpd

Another part of Issue #60007

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/avx-win64.ll
    llvm/test/CodeGen/X86/bitcast-vector-bool.ll
    llvm/test/CodeGen/X86/combine-movmsk-avx.ll
    llvm/test/CodeGen/X86/combine-movmsk.ll
    llvm/test/CodeGen/X86/movmsk-cmp.ll
    llvm/test/CodeGen/X86/vector-compare-any_of.ll
    llvm/test/CodeGen/X86/vector-reduce-or-bool.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 059de932f765..d7bccd6e334c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47656,6 +47656,17 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
     }
   }
 
+  // MOVMSKPS(V) !=/== 0 -> TESTPS(V,V)
+  // MOVMSKPD(V) !=/== 0 -> TESTPD(V,V)
+  // iff every element is referenced.
+  if (NumElts <= CmpBits && IsAnyOf && Subtarget.hasAVX() && IsOneUse &&
+      (NumEltBits == 32 || NumEltBits == 64)) {
+    MVT FloatSVT = MVT::getFloatingPointVT(NumEltBits);
+    MVT FloatVT = MVT::getVectorVT(FloatSVT, NumElts);
+    SDValue V = DAG.getBitcast(FloatVT, Vec);
+    return DAG.getNode(X86ISD::TESTP, SDLoc(EFLAGS), MVT::i32, V, V);
+  }
+
   return SDValue();
 }
 

diff  --git a/llvm/test/CodeGen/X86/avx-win64.ll b/llvm/test/CodeGen/X86/avx-win64.ll
index 64bc398a97ea..239242eb7867 100644
--- a/llvm/test/CodeGen/X86/avx-win64.ll
+++ b/llvm/test/CodeGen/X86/avx-win64.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-pc-win32"
 
 ; CHECK: f___vyf
 ; CHECK: pushq %rbp
-; CHECK: vmovmsk
+; CHECK: vtestps
 ; CHECK: vmovaps %ymm{{.*}}(%r
 ; CHECK: vmovaps %ymm{{.*}}(%r
 ; CHECK: call

diff  --git a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
index 049754119702..f903e3696042 100644
--- a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
+++ b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
@@ -1450,9 +1450,7 @@ define i8 @PR59526(<8 x i32> %a, <8 x i32> %b, ptr %mask) {
 ; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vandps (%rdi), %ymm0, %ymm0
-; AVX1-NEXT:    vmovmskps %ymm0, %eax
-; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    vtestps (%rdi), %ymm0
 ; AVX1-NEXT:    setne %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -1460,9 +1458,7 @@ define i8 @PR59526(<8 x i32> %a, <8 x i32> %b, ptr %mask) {
 ; AVX2-LABEL: PR59526:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpand (%rdi), %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskps %ymm0, %eax
-; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    vtestps (%rdi), %ymm0
 ; AVX2-NEXT:    setne %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
index 3277c89a3e34..70774ec36cec 100644
--- a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
+++ b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
@@ -12,8 +12,7 @@ define i1 @movmskps_noneof_bitcast_v4f64(<4 x double> %a0) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
 ; CHECK-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm0
-; CHECK-NEXT:    vmovmskpd %ymm0, %eax
-; CHECK-NEXT:    testl %eax, %eax
+; CHECK-NEXT:    vtestpd %ymm0, %ymm0
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
@@ -139,10 +138,10 @@ define i32 @movmskps_concat_v4f32(<4 x float> %a0, <4 x float> %a1)  {
 ; CHECK-LABEL: movmskps_concat_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vmovmskps %xmm0, %ecx
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    negl %ecx
-; CHECK-NEXT:    sbbl %eax, %eax
+; CHECK-NEXT:    vtestps %xmm0, %xmm0
+; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    negl %eax
 ; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   %2 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %1)

diff  --git a/llvm/test/CodeGen/X86/combine-movmsk.ll b/llvm/test/CodeGen/X86/combine-movmsk.ll
index 3cdde3d9ea4a..9ad3c16dcf0f 100644
--- a/llvm/test/CodeGen/X86/combine-movmsk.ll
+++ b/llvm/test/CodeGen/X86/combine-movmsk.ll
@@ -24,8 +24,7 @@ define i1 @movmskps_noneof_bitcast_v2f64(<2 x double> %a0) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vcmpeqpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vmovmskpd %xmm0, %eax
-; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    vtestpd %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
   %1 = fcmp oeq <2 x double> zeroinitializer, %a0
@@ -80,8 +79,7 @@ define i1 @pmovmskb_noneof_bitcast_v2i64(<2 x i64> %a0) {
 ;
 ; AVX-LABEL: pmovmskb_noneof_bitcast_v2i64:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vmovmskpd %xmm0, %eax
-; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    vtestpd %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
   %1 = icmp sgt <2 x i64> zeroinitializer, %a0
@@ -136,8 +134,7 @@ define i1 @pmovmskb_noneof_bitcast_v4f32(<4 x float> %a0) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskps %xmm0, %eax
-; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    vtestps %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
   %1 = fcmp oeq <4 x float> %a0, zeroinitializer

diff  --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index e7f9b3639268..8473e99b709a 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -575,8 +575,7 @@ define i1 @allzeros_v4i32_sign(<4 x i32> %arg) {
 ;
 ; AVX-LABEL: allzeros_v4i32_sign:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vmovmskps %xmm0, %eax
-; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    vtestps %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
   %tmp = icmp slt <4 x i32> %arg, zeroinitializer
@@ -619,8 +618,7 @@ define i1 @allzeros_v8i32_sign(<8 x i32> %arg) {
 ;
 ; AVX-LABEL: allzeros_v8i32_sign:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vmovmskps %ymm0, %eax
-; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    vtestps %ymm0, %ymm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
@@ -779,8 +777,7 @@ define i1 @allzeros_v4i64_sign(<4 x i64> %arg) {
 ;
 ; AVX-LABEL: allzeros_v4i64_sign:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vmovmskpd %ymm0, %eax
-; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    vtestpd %ymm0, %ymm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
@@ -865,8 +862,7 @@ define i1 @allzeros_v8i64_sign(<8 x i64> %arg) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vmovmskps %xmm0, %eax
-; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    vtestps %xmm0, %xmm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -874,8 +870,7 @@ define i1 @allzeros_v8i64_sign(<8 x i64> %arg) {
 ; AVX2-LABEL: allzeros_v8i64_sign:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskps %ymm0, %eax
-; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    vtestps %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -3347,8 +3342,7 @@ define i1 @allzeros_v8f32_nnan(<8 x float> %a0) {
 ; AVX1OR2:       # %bb.0:
 ; AVX1OR2-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; AVX1OR2-NEXT:    vcmpneqps %ymm1, %ymm0, %ymm0
-; AVX1OR2-NEXT:    vmovmskps %ymm0, %eax
-; AVX1OR2-NEXT:    testl %eax, %eax
+; AVX1OR2-NEXT:    vtestps %ymm0, %ymm0
 ; AVX1OR2-NEXT:    setne %al
 ; AVX1OR2-NEXT:    vzeroupper
 ; AVX1OR2-NEXT:    retq
@@ -3876,8 +3870,7 @@ define i1 @movmsk_or_v2f64(<2 x double> %x, <2 x double> %y) {
 ; AVX1OR2-LABEL: movmsk_or_v2f64:
 ; AVX1OR2:       # %bb.0:
 ; AVX1OR2-NEXT:    vcmplepd %xmm0, %xmm1, %xmm0
-; AVX1OR2-NEXT:    vmovmskpd %xmm0, %eax
-; AVX1OR2-NEXT:    testl %eax, %eax
+; AVX1OR2-NEXT:    vtestpd %xmm0, %xmm0
 ; AVX1OR2-NEXT:    setne %al
 ; AVX1OR2-NEXT:    retq
 ;

diff  --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
index 1fe8d21c4236..9dd4302f9a74 100644
--- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
@@ -18,10 +18,10 @@ define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
 ; AVX-LABEL: test_v2f64_sext:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vmovmskpd %xmm0, %ecx
 ; AVX-NEXT:    xorl %eax, %eax
-; AVX-NEXT:    negl %ecx
-; AVX-NEXT:    sbbq %rax, %rax
+; AVX-NEXT:    vtestpd %xmm0, %xmm0
+; AVX-NEXT:    setne %al
+; AVX-NEXT:    negq %rax
 ; AVX-NEXT:    retq
   %c = fcmp ogt <2 x double> %a0, %a1
   %s = sext <2 x i1> %c to <2 x i64>
@@ -46,10 +46,10 @@ define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
 ; AVX-LABEL: test_v4f64_sext:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
-; AVX-NEXT:    vmovmskpd %ymm0, %ecx
 ; AVX-NEXT:    xorl %eax, %eax
-; AVX-NEXT:    negl %ecx
-; AVX-NEXT:    sbbq %rax, %rax
+; AVX-NEXT:    vtestpd %ymm0, %ymm0
+; AVX-NEXT:    setne %al
+; AVX-NEXT:    negq %rax
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
   %c = fcmp ogt <4 x double> %a0, %a1
@@ -79,10 +79,10 @@ define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
 ; AVX1OR2-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
 ; AVX1OR2-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1OR2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
-; AVX1OR2-NEXT:    vmovmskps %xmm0, %ecx
 ; AVX1OR2-NEXT:    xorl %eax, %eax
-; AVX1OR2-NEXT:    negl %ecx
-; AVX1OR2-NEXT:    sbbq %rax, %rax
+; AVX1OR2-NEXT:    vtestps %xmm0, %xmm0
+; AVX1OR2-NEXT:    setne %al
+; AVX1OR2-NEXT:    negq %rax
 ; AVX1OR2-NEXT:    vzeroupper
 ; AVX1OR2-NEXT:    retq
 ;
@@ -91,10 +91,10 @@ define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
 ; AVX512-NEXT:    vcmpltpd %ymm0, %ymm1, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT:    vmovmskps %xmm0, %ecx
 ; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    negl %ecx
-; AVX512-NEXT:    sbbq %rax, %rax
+; AVX512-NEXT:    vtestps %xmm0, %xmm0
+; AVX512-NEXT:    setne %al
+; AVX512-NEXT:    negq %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %c = fcmp ogt <4 x double> %a0, %a1
@@ -121,10 +121,10 @@ define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
 ; AVX-LABEL: test_v4f32_sext:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vmovmskps %xmm0, %ecx
 ; AVX-NEXT:    xorl %eax, %eax
-; AVX-NEXT:    negl %ecx
-; AVX-NEXT:    sbbl %eax, %eax
+; AVX-NEXT:    vtestps %xmm0, %xmm0
+; AVX-NEXT:    setne %al
+; AVX-NEXT:    negl %eax
 ; AVX-NEXT:    retq
   %c = fcmp ogt <4 x float> %a0, %a1
   %s = sext <4 x i1> %c to <4 x i32>
@@ -151,10 +151,10 @@ define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
 ; AVX-LABEL: test_v8f32_sext:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcmpltps %ymm0, %ymm1, %ymm0
-; AVX-NEXT:    vmovmskps %ymm0, %ecx
 ; AVX-NEXT:    xorl %eax, %eax
-; AVX-NEXT:    negl %ecx
-; AVX-NEXT:    sbbl %eax, %eax
+; AVX-NEXT:    vtestps %ymm0, %ymm0
+; AVX-NEXT:    setne %al
+; AVX-NEXT:    negl %eax
 ; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
   %c = fcmp ogt <8 x float> %a0, %a1
@@ -246,10 +246,10 @@ define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
 ; AVX-LABEL: test_v2i64_sext:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskpd %xmm0, %ecx
 ; AVX-NEXT:    xorl %eax, %eax
-; AVX-NEXT:    negl %ecx
-; AVX-NEXT:    sbbq %rax, %rax
+; AVX-NEXT:    vtestpd %xmm0, %xmm0
+; AVX-NEXT:    setne %al
+; AVX-NEXT:    negq %rax
 ; AVX-NEXT:    retq
   %c = icmp sgt <2 x i64> %a0, %a1
   %s = sext <2 x i1> %c to <2 x i64>
@@ -304,30 +304,30 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovmskpd %xmm0, %ecx
 ; AVX1-NEXT:    xorl %eax, %eax
-; AVX1-NEXT:    negl %ecx
-; AVX1-NEXT:    sbbq %rax, %rax
+; AVX1-NEXT:    vtestpd %xmm0, %xmm0
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    negq %rax
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_v4i64_sext:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskpd %ymm0, %ecx
 ; AVX2-NEXT:    xorl %eax, %eax
-; AVX2-NEXT:    negl %ecx
-; AVX2-NEXT:    sbbq %rax, %rax
+; AVX2-NEXT:    vtestpd %ymm0, %ymm0
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    negq %rax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: test_v4i64_sext:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    vmovmskpd %ymm0, %ecx
 ; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    negl %ecx
-; AVX512-NEXT:    sbbq %rax, %rax
+; AVX512-NEXT:    vtestpd %ymm0, %ymm0
+; AVX512-NEXT:    setne %al
+; AVX512-NEXT:    negq %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %c = icmp sgt <4 x i64> %a0, %a1
@@ -385,10 +385,10 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovmskps %xmm0, %ecx
 ; AVX1-NEXT:    xorl %eax, %eax
-; AVX1-NEXT:    negl %ecx
-; AVX1-NEXT:    sbbq %rax, %rax
+; AVX1-NEXT:    vtestps %xmm0, %xmm0
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    negq %rax
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -397,10 +397,10 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
 ; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovmskps %xmm0, %ecx
 ; AVX2-NEXT:    xorl %eax, %eax
-; AVX2-NEXT:    negl %ecx
-; AVX2-NEXT:    sbbq %rax, %rax
+; AVX2-NEXT:    vtestps %xmm0, %xmm0
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    negq %rax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
@@ -409,10 +409,10 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
 ; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT:    vmovmskps %xmm0, %ecx
 ; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    negl %ecx
-; AVX512-NEXT:    sbbq %rax, %rax
+; AVX512-NEXT:    vtestps %xmm0, %xmm0
+; AVX512-NEXT:    setne %al
+; AVX512-NEXT:    negq %rax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %c = icmp sgt <4 x i64> %a0, %a1
@@ -439,10 +439,10 @@ define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
 ; AVX-LABEL: test_v4i32_sext:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskps %xmm0, %ecx
 ; AVX-NEXT:    xorl %eax, %eax
-; AVX-NEXT:    negl %ecx
-; AVX-NEXT:    sbbl %eax, %eax
+; AVX-NEXT:    vtestps %xmm0, %xmm0
+; AVX-NEXT:    setne %al
+; AVX-NEXT:    negl %eax
 ; AVX-NEXT:    retq
   %c = icmp sgt <4 x i32> %a0, %a1
   %s = sext <4 x i1> %c to <4 x i32>
@@ -473,30 +473,30 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
 ; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovmskps %xmm0, %ecx
 ; AVX1-NEXT:    xorl %eax, %eax
-; AVX1-NEXT:    negl %ecx
-; AVX1-NEXT:    sbbl %eax, %eax
+; AVX1-NEXT:    vtestps %xmm0, %xmm0
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    negl %eax
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_v8i32_sext:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskps %ymm0, %ecx
 ; AVX2-NEXT:    xorl %eax, %eax
-; AVX2-NEXT:    negl %ecx
-; AVX2-NEXT:    sbbl %eax, %eax
+; AVX2-NEXT:    vtestps %ymm0, %ymm0
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    negl %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: test_v8i32_sext:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    vmovmskps %ymm0, %ecx
 ; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    negl %ecx
-; AVX512-NEXT:    sbbl %eax, %eax
+; AVX512-NEXT:    vtestps %ymm0, %ymm0
+; AVX512-NEXT:    setne %al
+; AVX512-NEXT:    negl %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
   %c = icmp sgt <8 x i32> %a0, %a1
@@ -843,8 +843,7 @@ define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) {
 ; AVX1OR2-LABEL: bool_reduction_v2f64:
 ; AVX1OR2:       # %bb.0:
 ; AVX1OR2-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX1OR2-NEXT:    vmovmskpd %xmm0, %eax
-; AVX1OR2-NEXT:    testl %eax, %eax
+; AVX1OR2-NEXT:    vtestpd %xmm0, %xmm0
 ; AVX1OR2-NEXT:    setne %al
 ; AVX1OR2-NEXT:    retq
 ;
@@ -874,8 +873,7 @@ define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) {
 ; AVX1OR2-LABEL: bool_reduction_v4f32:
 ; AVX1OR2:       # %bb.0:
 ; AVX1OR2-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0
-; AVX1OR2-NEXT:    vmovmskps %xmm0, %eax
-; AVX1OR2-NEXT:    testl %eax, %eax
+; AVX1OR2-NEXT:    vtestps %xmm0, %xmm0
 ; AVX1OR2-NEXT:    setne %al
 ; AVX1OR2-NEXT:    retq
 ;
@@ -909,8 +907,7 @@ define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) {
 ; AVX1OR2-LABEL: bool_reduction_v4f64:
 ; AVX1OR2:       # %bb.0:
 ; AVX1OR2-NEXT:    vcmplepd %ymm0, %ymm1, %ymm0
-; AVX1OR2-NEXT:    vmovmskpd %ymm0, %eax
-; AVX1OR2-NEXT:    testl %eax, %eax
+; AVX1OR2-NEXT:    vtestpd %ymm0, %ymm0
 ; AVX1OR2-NEXT:    setne %al
 ; AVX1OR2-NEXT:    vzeroupper
 ; AVX1OR2-NEXT:    retq
@@ -946,8 +943,7 @@ define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) {
 ; AVX1OR2-LABEL: bool_reduction_v8f32:
 ; AVX1OR2:       # %bb.0:
 ; AVX1OR2-NEXT:    vcmpneqps %ymm1, %ymm0, %ymm0
-; AVX1OR2-NEXT:    vmovmskps %ymm0, %eax
-; AVX1OR2-NEXT:    testl %eax, %eax
+; AVX1OR2-NEXT:    vtestps %ymm0, %ymm0
 ; AVX1OR2-NEXT:    setne %al
 ; AVX1OR2-NEXT:    vzeroupper
 ; AVX1OR2-NEXT:    retq
@@ -1158,8 +1154,7 @@ define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
 ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovmskpd %xmm0, %eax
-; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    vtestpd %xmm0, %xmm0
 ; AVX1-NEXT:    setne %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -1167,8 +1162,7 @@ define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
 ; AVX2-LABEL: bool_reduction_v4i64:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
-; AVX2-NEXT:    vmovmskpd %ymm0, %eax
-; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    vtestpd %ymm0, %ymm0
 ; AVX2-NEXT:    setne %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1230,8 +1224,7 @@ define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
 ; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm1
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovmskps %xmm0, %eax
-; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    vtestps %xmm0, %xmm0
 ; AVX1-NEXT:    setne %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -1240,8 +1233,7 @@ define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm1
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskps %ymm0, %eax
-; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    vtestps %ymm0, %ymm0
 ; AVX2-NEXT:    setne %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
index 6a434343fce7..815ba225ccbf 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
@@ -550,8 +550,7 @@ define i1 @icmp0_v2i64_v2i1(<2 x i64>) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskpd %xmm0, %eax
-; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    vtestpd %xmm0, %xmm0
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
 ;
@@ -601,8 +600,7 @@ define i1 @icmp0_v4i32_v4i1(<4 x i32>) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskps %xmm0, %eax
-; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    vtestps %xmm0, %xmm0
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
 ;
@@ -772,8 +770,7 @@ define i1 @icmp0_v4i64_v4i1(<4 x i64>) {
 ; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vmovmskpd %xmm0, %eax
-; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    vtestpd %xmm0, %xmm0
 ; AVX1-NEXT:    setne %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -782,8 +779,7 @@ define i1 @icmp0_v4i64_v4i1(<4 x i64>) {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskpd %ymm0, %eax
-; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    vtestpd %ymm0, %ymm0
 ; AVX2-NEXT:    setne %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -840,8 +836,7 @@ define i1 @icmp0_v8i32_v8i1(<8 x i32>) {
 ; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vmovmskps %xmm0, %eax
-; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    vtestps %xmm0, %xmm0
 ; AVX1-NEXT:    setne %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -850,8 +845,7 @@ define i1 @icmp0_v8i32_v8i1(<8 x i32>) {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskps %ymm0, %eax
-; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    vtestps %ymm0, %ymm0
 ; AVX2-NEXT:    setne %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1073,8 +1067,7 @@ define i1 @icmp0_v8i64_v8i1(<8 x i64>) {
 ; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vmovmskps %xmm0, %eax
-; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    vtestps %xmm0, %xmm0
 ; AVX1-NEXT:    setne %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -1085,8 +1078,7 @@ define i1 @icmp0_v8i64_v8i1(<8 x i64>) {
 ; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskps %ymm0, %eax
-; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    vtestps %ymm0, %ymm0
 ; AVX2-NEXT:    setne %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1364,8 +1356,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) {
 ; AVX-LABEL: icmp_v2i64_v2i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskpd %xmm0, %eax
-; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    vtestpd %xmm0, %xmm0
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
 ;
@@ -1415,8 +1406,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>, <4 x i32>) {
 ; AVX-LABEL: icmp_v4i32_v4i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskps %xmm0, %eax
-; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    vtestps %xmm0, %xmm0
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
 ;
@@ -1582,8 +1572,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) {
 ; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovmskpd %xmm0, %eax
-; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    vtestpd %xmm0, %xmm0
 ; AVX1-NEXT:    setne %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -1591,8 +1580,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) {
 ; AVX2-LABEL: icmp_v4i64_v4i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskpd %ymm0, %eax
-; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    vtestpd %ymm0, %ymm0
 ; AVX2-NEXT:    setne %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1650,8 +1638,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) {
 ; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovmskps %xmm0, %eax
-; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    vtestps %xmm0, %xmm0
 ; AVX1-NEXT:    setne %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -1659,8 +1646,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) {
 ; AVX2-LABEL: icmp_v8i32_v8i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskps %ymm0, %eax
-; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    vtestps %ymm0, %ymm0
 ; AVX2-NEXT:    setne %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1879,8 +1865,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) {
 ; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vmovmskps %xmm0, %eax
-; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    vtestps %xmm0, %xmm0
 ; AVX1-NEXT:    setne %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -1890,8 +1875,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) {
 ; AVX2-NEXT:    vpcmpeqq %ymm3, %ymm1, %ymm1
 ; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskps %ymm0, %eax
-; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    vtestps %ymm0, %ymm0
 ; AVX2-NEXT:    setne %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq


        


More information about the llvm-commits mailing list