[llvm] ad3a765 - [X86] combineCMP - peek through zero-extensions for X86cmp(zext(x0),0) zero tests (PR38960)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 13 04:38:56 PDT 2022


Author: Simon Pilgrim
Date: 2022-03-13T11:38:40Z
New Revision: ad3a7654dc7dba66dafbdab0c06719174cd2b437

URL: https://github.com/llvm/llvm-project/commit/ad3a7654dc7dba66dafbdab0c06719174cd2b437
DIFF: https://github.com/llvm/llvm-project/commit/ad3a7654dc7dba66dafbdab0c06719174cd2b437.diff

LOG: [X86] combineCMP - peek through zero-extensions for X86cmp(zext(x0),0) zero tests (PR38960)

If we're comparing a value against zero, strip away any zero-extension and perform the comparison on the pre-extended value

Fixes #38308

Differential Revision: https://reviews.llvm.org/D121472

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/comi-flags.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 371255172fcab..d8c4e7b69c153 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -52086,6 +52086,16 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
     }
   }
 
+  // Peek through any zero-extend if we're only testing for a zero result.
+  if (Op.getOpcode() == ISD::ZERO_EXTEND && onlyZeroFlagUsed(SDValue(N, 0))) {
+    SDValue Src = Op.getOperand(0);
+    EVT SrcVT = Src.getValueType();
+    if (SrcVT.getScalarSizeInBits() >= 8 &&
+        DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))
+      return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Src,
+                         DAG.getConstant(0, dl, SrcVT));
+  }
+
   // Look for a truncate.
   if (Op.getOpcode() != ISD::TRUNCATE)
     return SDValue();

diff  --git a/llvm/test/CodeGen/X86/comi-flags.ll b/llvm/test/CodeGen/X86/comi-flags.ll
index a4c14c6012430..097d3943eaf5e 100644
--- a/llvm/test/CodeGen/X86/comi-flags.ll
+++ b/llvm/test/CodeGen/X86/comi-flags.ll
@@ -13,9 +13,7 @@ define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i3
 ; SSE-NEXT:    comiss %xmm1, %xmm0
 ; SSE-NEXT:    setnp %cl
 ; SSE-NEXT:    sete %dl
-; SSE-NEXT:    andb %cl, %dl
-; SSE-NEXT:    movzbl %dl, %ecx
-; SSE-NEXT:    testl %ecx, %ecx
+; SSE-NEXT:    testb %cl, %dl
 ; SSE-NEXT:    cmovnel %esi, %eax
 ; SSE-NEXT:    retq
 ;
@@ -25,9 +23,7 @@ define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i3
 ; AVX-NEXT:    vcomiss %xmm1, %xmm0
 ; AVX-NEXT:    setnp %cl
 ; AVX-NEXT:    sete %dl
-; AVX-NEXT:    andb %cl, %dl
-; AVX-NEXT:    movzbl %dl, %ecx
-; AVX-NEXT:    testl %ecx, %ecx
+; AVX-NEXT:    testb %cl, %dl
 ; AVX-NEXT:    cmovnel %esi, %eax
 ; AVX-NEXT:    retq
   %call = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
@@ -124,26 +120,18 @@ declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
 define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i32 %a3) {
 ; SSE-LABEL: test_x86_sse_comineq_ss:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    movl %esi, %eax
 ; SSE-NEXT:    comiss %xmm1, %xmm0
-; SSE-NEXT:    setp %cl
-; SSE-NEXT:    setne %dl
-; SSE-NEXT:    orb %cl, %dl
-; SSE-NEXT:    movzbl %dl, %ecx
-; SSE-NEXT:    testl %ecx, %ecx
-; SSE-NEXT:    cmovel %esi, %eax
+; SSE-NEXT:    cmovnel %edi, %eax
+; SSE-NEXT:    cmovpl %edi, %eax
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_x86_sse_comineq_ss:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    movl %esi, %eax
 ; AVX-NEXT:    vcomiss %xmm1, %xmm0
-; AVX-NEXT:    setp %cl
-; AVX-NEXT:    setne %dl
-; AVX-NEXT:    orb %cl, %dl
-; AVX-NEXT:    movzbl %dl, %ecx
-; AVX-NEXT:    testl %ecx, %ecx
-; AVX-NEXT:    cmovel %esi, %eax
+; AVX-NEXT:    cmovnel %edi, %eax
+; AVX-NEXT:    cmovpl %edi, %eax
 ; AVX-NEXT:    retq
   %call = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1)
   %cmp = icmp ne i32 %call, 0
@@ -159,9 +147,7 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i
 ; SSE-NEXT:    ucomiss %xmm1, %xmm0
 ; SSE-NEXT:    setnp %cl
 ; SSE-NEXT:    sete %dl
-; SSE-NEXT:    andb %cl, %dl
-; SSE-NEXT:    movzbl %dl, %ecx
-; SSE-NEXT:    testl %ecx, %ecx
+; SSE-NEXT:    testb %cl, %dl
 ; SSE-NEXT:    cmovnel %esi, %eax
 ; SSE-NEXT:    retq
 ;
@@ -171,9 +157,7 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i
 ; AVX-NEXT:    vucomiss %xmm1, %xmm0
 ; AVX-NEXT:    setnp %cl
 ; AVX-NEXT:    sete %dl
-; AVX-NEXT:    andb %cl, %dl
-; AVX-NEXT:    movzbl %dl, %ecx
-; AVX-NEXT:    testl %ecx, %ecx
+; AVX-NEXT:    testb %cl, %dl
 ; AVX-NEXT:    cmovnel %esi, %eax
 ; AVX-NEXT:    retq
   %call = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
@@ -270,26 +254,18 @@ declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
 define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i32 %a3) {
 ; SSE-LABEL: test_x86_sse_ucomineq_ss:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    movl %esi, %eax
 ; SSE-NEXT:    ucomiss %xmm1, %xmm0
-; SSE-NEXT:    setp %cl
-; SSE-NEXT:    setne %dl
-; SSE-NEXT:    orb %cl, %dl
-; SSE-NEXT:    movzbl %dl, %ecx
-; SSE-NEXT:    testl %ecx, %ecx
-; SSE-NEXT:    cmovel %esi, %eax
+; SSE-NEXT:    cmovnel %edi, %eax
+; SSE-NEXT:    cmovpl %edi, %eax
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_x86_sse_ucomineq_ss:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    movl %esi, %eax
 ; AVX-NEXT:    vucomiss %xmm1, %xmm0
-; AVX-NEXT:    setp %cl
-; AVX-NEXT:    setne %dl
-; AVX-NEXT:    orb %cl, %dl
-; AVX-NEXT:    movzbl %dl, %ecx
-; AVX-NEXT:    testl %ecx, %ecx
-; AVX-NEXT:    cmovel %esi, %eax
+; AVX-NEXT:    cmovnel %edi, %eax
+; AVX-NEXT:    cmovpl %edi, %eax
 ; AVX-NEXT:    retq
   %call = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1)
   %cmp = icmp ne i32 %call, 0
@@ -309,9 +285,7 @@ define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2,
 ; SSE-NEXT:    comisd %xmm1, %xmm0
 ; SSE-NEXT:    setnp %cl
 ; SSE-NEXT:    sete %dl
-; SSE-NEXT:    andb %cl, %dl
-; SSE-NEXT:    movzbl %dl, %ecx
-; SSE-NEXT:    testl %ecx, %ecx
+; SSE-NEXT:    testb %cl, %dl
 ; SSE-NEXT:    cmovnel %esi, %eax
 ; SSE-NEXT:    retq
 ;
@@ -321,9 +295,7 @@ define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2,
 ; AVX-NEXT:    vcomisd %xmm1, %xmm0
 ; AVX-NEXT:    setnp %cl
 ; AVX-NEXT:    sete %dl
-; AVX-NEXT:    andb %cl, %dl
-; AVX-NEXT:    movzbl %dl, %ecx
-; AVX-NEXT:    testl %ecx, %ecx
+; AVX-NEXT:    testb %cl, %dl
 ; AVX-NEXT:    cmovnel %esi, %eax
 ; AVX-NEXT:    retq
   %call = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -420,26 +392,18 @@ declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readno
 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2, i32 %a3) {
 ; SSE-LABEL: test_x86_sse2_comineq_sd:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    movl %esi, %eax
 ; SSE-NEXT:    comisd %xmm1, %xmm0
-; SSE-NEXT:    setp %cl
-; SSE-NEXT:    setne %dl
-; SSE-NEXT:    orb %cl, %dl
-; SSE-NEXT:    movzbl %dl, %ecx
-; SSE-NEXT:    testl %ecx, %ecx
-; SSE-NEXT:    cmovel %esi, %eax
+; SSE-NEXT:    cmovnel %edi, %eax
+; SSE-NEXT:    cmovpl %edi, %eax
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_x86_sse2_comineq_sd:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    movl %esi, %eax
 ; AVX-NEXT:    vcomisd %xmm1, %xmm0
-; AVX-NEXT:    setp %cl
-; AVX-NEXT:    setne %dl
-; AVX-NEXT:    orb %cl, %dl
-; AVX-NEXT:    movzbl %dl, %ecx
-; AVX-NEXT:    testl %ecx, %ecx
-; AVX-NEXT:    cmovel %esi, %eax
+; AVX-NEXT:    cmovnel %edi, %eax
+; AVX-NEXT:    cmovpl %edi, %eax
 ; AVX-NEXT:    retq
   %call = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   %cmp = icmp ne i32 %call, 0
@@ -455,9 +419,7 @@ define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2
 ; SSE-NEXT:    ucomisd %xmm1, %xmm0
 ; SSE-NEXT:    setnp %cl
 ; SSE-NEXT:    sete %dl
-; SSE-NEXT:    andb %cl, %dl
-; SSE-NEXT:    movzbl %dl, %ecx
-; SSE-NEXT:    testl %ecx, %ecx
+; SSE-NEXT:    testb %cl, %dl
 ; SSE-NEXT:    cmovnel %esi, %eax
 ; SSE-NEXT:    retq
 ;
@@ -467,9 +429,7 @@ define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2
 ; AVX-NEXT:    vucomisd %xmm1, %xmm0
 ; AVX-NEXT:    setnp %cl
 ; AVX-NEXT:    sete %dl
-; AVX-NEXT:    andb %cl, %dl
-; AVX-NEXT:    movzbl %dl, %ecx
-; AVX-NEXT:    testl %ecx, %ecx
+; AVX-NEXT:    testb %cl, %dl
 ; AVX-NEXT:    cmovnel %esi, %eax
 ; AVX-NEXT:    retq
   %call = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -566,26 +526,18 @@ declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readn
 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2, i32 %a3) {
 ; SSE-LABEL: test_x86_sse2_ucomineq_sd:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    movl %esi, %eax
 ; SSE-NEXT:    ucomisd %xmm1, %xmm0
-; SSE-NEXT:    setp %cl
-; SSE-NEXT:    setne %dl
-; SSE-NEXT:    orb %cl, %dl
-; SSE-NEXT:    movzbl %dl, %ecx
-; SSE-NEXT:    testl %ecx, %ecx
-; SSE-NEXT:    cmovel %esi, %eax
+; SSE-NEXT:    cmovnel %edi, %eax
+; SSE-NEXT:    cmovpl %edi, %eax
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_x86_sse2_ucomineq_sd:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movl %edi, %eax
+; AVX-NEXT:    movl %esi, %eax
 ; AVX-NEXT:    vucomisd %xmm1, %xmm0
-; AVX-NEXT:    setp %cl
-; AVX-NEXT:    setne %dl
-; AVX-NEXT:    orb %cl, %dl
-; AVX-NEXT:    movzbl %dl, %ecx
-; AVX-NEXT:    testl %ecx, %ecx
-; AVX-NEXT:    cmovel %esi, %eax
+; AVX-NEXT:    cmovnel %edi, %eax
+; AVX-NEXT:    cmovpl %edi, %eax
 ; AVX-NEXT:    retq
   %call = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   %cmp = icmp ne i32 %call, 0
@@ -600,9 +552,7 @@ define void @PR38960_eq(<4 x float> %A, <4 x float> %B) {
 ; SSE-NEXT:    comiss %xmm1, %xmm0
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    sete %cl
-; SSE-NEXT:    andb %al, %cl
-; SSE-NEXT:    movzbl %cl, %eax
-; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    testb %al, %cl
 ; SSE-NEXT:    je .LBB24_1
 ; SSE-NEXT:  # %bb.2: # %if.then
 ; SSE-NEXT:    jmp foo at PLT # TAILCALL
@@ -614,9 +564,7 @@ define void @PR38960_eq(<4 x float> %A, <4 x float> %B) {
 ; AVX-NEXT:    vcomiss %xmm1, %xmm0
 ; AVX-NEXT:    setnp %al
 ; AVX-NEXT:    sete %cl
-; AVX-NEXT:    andb %al, %cl
-; AVX-NEXT:    movzbl %cl, %eax
-; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    testb %al, %cl
 ; AVX-NEXT:    je .LBB24_1
 ; AVX-NEXT:  # %bb.2: # %if.then
 ; AVX-NEXT:    jmp foo at PLT # TAILCALL
@@ -642,8 +590,6 @@ define void @PR38960_neq(<4 x float> %A, <4 x float> %B) {
 ; SSE-NEXT:    setp %al
 ; SSE-NEXT:    setne %cl
 ; SSE-NEXT:    orb %al, %cl
-; SSE-NEXT:    movzbl %cl, %eax
-; SSE-NEXT:    testl %eax, %eax
 ; SSE-NEXT:    je .LBB25_1
 ; SSE-NEXT:  # %bb.2: # %if.then
 ; SSE-NEXT:    jmp foo at PLT # TAILCALL
@@ -656,8 +602,6 @@ define void @PR38960_neq(<4 x float> %A, <4 x float> %B) {
 ; AVX-NEXT:    setp %al
 ; AVX-NEXT:    setne %cl
 ; AVX-NEXT:    orb %al, %cl
-; AVX-NEXT:    movzbl %cl, %eax
-; AVX-NEXT:    testl %eax, %eax
 ; AVX-NEXT:    je .LBB25_1
 ; AVX-NEXT:  # %bb.2: # %if.then
 ; AVX-NEXT:    jmp foo at PLT # TAILCALL


        


More information about the llvm-commits mailing list