[llvm] 772533d - [X86] Narrow i64 compares with constant to i32 when the upper 32-bits are known zero.

Tue Oct 29 11:40:05 PDT 2019

Author: Craig Topper
Date: 2019-10-29T11:38:15-07:00
New Revision: 772533d9214b6e23762847fc7080a4201396fb10

URL: https://github.com/llvm/llvm-project/commit/772533d9214b6e23762847fc7080a4201396fb10
DIFF: https://github.com/llvm/llvm-project/commit/772533d9214b6e23762847fc7080a4201396fb10.diff

LOG: [X86] Narrow i64 compares with constant to i32 when the upper 32-bits are known zero.

This catches some cases. There are probably ways to improve this.
I tried doing it as a combine on the setcc, but that broke
some cases involving flag reuse in place of test.

I renamed the isX86CCUnsigned to isX86CCSigned and flipped its
polarity to make it consistent with the similar functions for
ISD::SETCC. This avoids calling EQ/NE as being signed or unsigned.

Fixes PR43823.

Differential Revision: https://reviews.llvm.org/D69499

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
    llvm/test/CodeGen/X86/cmp.ll
    llvm/test/CodeGen/X86/ctpop-combine.ll
    llvm/test/CodeGen/X86/pr37063.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1606fb8f1085..2feba027b68f 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4643,8 +4643,8 @@ bool X86::isCalleePop(CallingConv::ID CallingConv,
   }
 }
 
-/// Return true if the condition is an unsigned comparison operation.
-static bool isX86CCUnsigned(unsigned X86CC) {
+/// Return true if the condition is an signed comparison operation.
+static bool isX86CCSigned(unsigned X86CC) {
   switch (X86CC) {
   default:
     llvm_unreachable("Invalid integer condition!");
@@ -4654,12 +4654,12 @@ static bool isX86CCUnsigned(unsigned X86CC) {
   case X86::COND_A:
   case X86::COND_BE:
   case X86::COND_AE:
-    return true;
+    return false;
   case X86::COND_G:
   case X86::COND_GE:
   case X86::COND_L:
   case X86::COND_LE:
-    return false;
+    return true;
   }
 }
 
@@ -20154,7 +20154,7 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
     if ((COp0 && !COp0->getAPIntValue().isSignedIntN(8)) ||
         (COp1 && !COp1->getAPIntValue().isSignedIntN(8))) {
       unsigned ExtendOp =
-          isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
+          isX86CCSigned(X86CC) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
       if (X86CC == X86::COND_E || X86CC == X86::COND_NE) {
         // For equality comparisons try to use SIGN_EXTEND if the input was
         // truncate from something with enough sign bits.
@@ -20178,6 +20178,18 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
       Op1 = DAG.getNode(ExtendOp, dl, CmpVT, Op1);
     }
   }
+
+  // Try to shrink i64 compares if the input has enough zero bits.
+  // FIXME: Do this for non-constant compares for constant on LHS?
+  if (CmpVT == MVT::i64 && isa<ConstantSDNode>(Op1) && !isX86CCSigned(X86CC) &&
+      Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub.
+      cast<ConstantSDNode>(Op1)->getAPIntValue().getActiveBits() <= 32 &&
+      DAG.MaskedValueIsZero(Op0, APInt::getHighBitsSet(64, 32))) {
+    CmpVT = MVT::i32;
+    Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0);
+    Op1 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op1);
+  }
+
   // Use SUB instead of CMP to enable CSE between SUB and CMP.
   SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32);
   SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1);

diff  --git a/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index bfa40738fee8..f4b1ca511fc0 100644
--- a/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; REQUIRES: asserts
-; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "6 machinelicm"
+; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "5 machinelicm"
 ; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn | FileCheck %s
 ; rdar://6627786
 ; rdar://7792037
@@ -33,7 +33,7 @@ define %struct.__vv* @t(%struct.Key* %desc, i64 %p) nounwind ssp {
 ; CHECK-NEXT:    jne LBB0_1
 ; CHECK-NEXT:  ## %bb.2: ## %bb26
 ; CHECK-NEXT:    ## in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    cmpq $1048576, %r14 ## imm = 0x100000
+; CHECK-NEXT:    cmpl $1048576, %r14d ## imm = 0x100000
 ; CHECK-NEXT:    jne LBB0_1
 ; CHECK-NEXT:  ## %bb.3: ## %bb.i
 ; CHECK-NEXT:    ## in Loop: Header=BB0_1 Depth=1

diff  --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll
index d13fecb8ed29..5f63223e69b8 100644
--- a/llvm/test/CodeGen/X86/cmp.ll
+++ b/llvm/test/CodeGen/X86/cmp.ll
@@ -184,7 +184,7 @@ define i32 @test8(i64 %res) nounwind {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT:    cmpq $3, %rdi # encoding: [0x48,0x83,0xff,0x03]
+; CHECK-NEXT:    cmpl $3, %edi # encoding: [0x83,0xff,0x03]
 ; CHECK-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 entry:
@@ -224,7 +224,7 @@ define i32 @test11(i64 %l) nounwind {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    shrq $47, %rdi # encoding: [0x48,0xc1,0xef,0x2f]
 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT:    cmpq $1, %rdi # encoding: [0x48,0x83,0xff,0x01]
+; CHECK-NEXT:    cmpl $1, %edi # encoding: [0x83,0xff,0x01]
 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 entry:

diff  --git a/llvm/test/CodeGen/X86/ctpop-combine.ll b/llvm/test/CodeGen/X86/ctpop-combine.ll
index cdef5771f2c2..e60935cc8732 100644
--- a/llvm/test/CodeGen/X86/ctpop-combine.ll
+++ b/llvm/test/CodeGen/X86/ctpop-combine.ll
@@ -116,7 +116,7 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
 ; POPCOUNT:       # %bb.0:
 ; POPCOUNT-NEXT:    popcntq %rdi, %rcx
 ; POPCOUNT-NEXT:    xorl %eax, %eax
-; POPCOUNT-NEXT:    cmpq $1, %rcx
+; POPCOUNT-NEXT:    cmpl $1, %ecx
 ; POPCOUNT-NEXT:    sete %al
 ; POPCOUNT-NEXT:    retq
 ;
@@ -141,7 +141,7 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
 ; POPCOUNT:       # %bb.0:
 ; POPCOUNT-NEXT:    popcntq %rdi, %rcx
 ; POPCOUNT-NEXT:    xorl %eax, %eax
-; POPCOUNT-NEXT:    cmpq $1, %rcx
+; POPCOUNT-NEXT:    cmpl $1, %ecx
 ; POPCOUNT-NEXT:    setne %al
 ; POPCOUNT-NEXT:    retq
 ;

diff  --git a/llvm/test/CodeGen/X86/pr37063.ll b/llvm/test/CodeGen/X86/pr37063.ll
index cf5e1fa56700..f7f8d622da55 100644
--- a/llvm/test/CodeGen/X86/pr37063.ll
+++ b/llvm/test/CodeGen/X86/pr37063.ll
@@ -6,9 +6,9 @@ declare void @bar()
 define void @foo(i64*) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %start
-; CHECK-NEXT:    movq (%rdi), %rax
-; CHECK-NEXT:    andl $-2, %eax
-; CHECK-NEXT:    cmpq $4, %rax
+; CHECK-NEXT:    movl (%rdi), %eax
+; CHECK-NEXT:    andl $6, %eax
+; CHECK-NEXT:    cmpl $4, %eax
 ; CHECK-NEXT:    jne .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %bb1
 ; CHECK-NEXT:    retq