[llvm] 772533d - [X86] Narrow i64 compares with constant to i32 when the upper 32-bits are known zero.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 29 11:40:05 PDT 2019
Author: Craig Topper
Date: 2019-10-29T11:38:15-07:00
New Revision: 772533d9214b6e23762847fc7080a4201396fb10
URL: https://github.com/llvm/llvm-project/commit/772533d9214b6e23762847fc7080a4201396fb10
DIFF: https://github.com/llvm/llvm-project/commit/772533d9214b6e23762847fc7080a4201396fb10.diff
LOG: [X86] Narrow i64 compares with constant to i32 when the upper 32-bits are known zero.
This catches some cases. There are probably ways to improve this.
I tried doing it as a combine on the setcc, but that broke
some cases involving flag reuse in place of test.
I renamed the isX86CCUnsigned to isX86CCSigned and flipped its
polarity to make it consistent with the similar functions for
ISD::SETCC. This avoids calling EQ/NE as being signed or unsigned.
Fixes PR43823.
Differential Revision: https://reviews.llvm.org/D69499
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
llvm/test/CodeGen/X86/cmp.ll
llvm/test/CodeGen/X86/ctpop-combine.ll
llvm/test/CodeGen/X86/pr37063.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1606fb8f1085..2feba027b68f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4643,8 +4643,8 @@ bool X86::isCalleePop(CallingConv::ID CallingConv,
}
}
-/// Return true if the condition is an unsigned comparison operation.
-static bool isX86CCUnsigned(unsigned X86CC) {
+/// Return true if the condition is an signed comparison operation.
+static bool isX86CCSigned(unsigned X86CC) {
switch (X86CC) {
default:
llvm_unreachable("Invalid integer condition!");
@@ -4654,12 +4654,12 @@ static bool isX86CCUnsigned(unsigned X86CC) {
case X86::COND_A:
case X86::COND_BE:
case X86::COND_AE:
- return true;
+ return false;
case X86::COND_G:
case X86::COND_GE:
case X86::COND_L:
case X86::COND_LE:
- return false;
+ return true;
}
}
@@ -20154,7 +20154,7 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
if ((COp0 && !COp0->getAPIntValue().isSignedIntN(8)) ||
(COp1 && !COp1->getAPIntValue().isSignedIntN(8))) {
unsigned ExtendOp =
- isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
+ isX86CCSigned(X86CC) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
if (X86CC == X86::COND_E || X86CC == X86::COND_NE) {
// For equality comparisons try to use SIGN_EXTEND if the input was
// truncate from something with enough sign bits.
@@ -20178,6 +20178,18 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
Op1 = DAG.getNode(ExtendOp, dl, CmpVT, Op1);
}
}
+
+ // Try to shrink i64 compares if the input has enough zero bits.
+ // FIXME: Do this for non-constant compares for constant on LHS?
+ if (CmpVT == MVT::i64 && isa<ConstantSDNode>(Op1) && !isX86CCSigned(X86CC) &&
+ Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub.
+ cast<ConstantSDNode>(Op1)->getAPIntValue().getActiveBits() <= 32 &&
+ DAG.MaskedValueIsZero(Op0, APInt::getHighBitsSet(64, 32))) {
+ CmpVT = MVT::i32;
+ Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0);
+ Op1 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op1);
+ }
+
// Use SUB instead of CMP to enable CSE between SUB and CMP.
SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32);
SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1);
diff --git a/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index bfa40738fee8..f4b1ca511fc0 100644
--- a/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; REQUIRES: asserts
-; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "6 machinelicm"
+; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "5 machinelicm"
; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn | FileCheck %s
; rdar://6627786
; rdar://7792037
@@ -33,7 +33,7 @@ define %struct.__vv* @t(%struct.Key* %desc, i64 %p) nounwind ssp {
; CHECK-NEXT: jne LBB0_1
; CHECK-NEXT: ## %bb.2: ## %bb26
; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: cmpq $1048576, %r14 ## imm = 0x100000
+; CHECK-NEXT: cmpl $1048576, %r14d ## imm = 0x100000
; CHECK-NEXT: jne LBB0_1
; CHECK-NEXT: ## %bb.3: ## %bb.i
; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
diff --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll
index d13fecb8ed29..5f63223e69b8 100644
--- a/llvm/test/CodeGen/X86/cmp.ll
+++ b/llvm/test/CodeGen/X86/cmp.ll
@@ -184,7 +184,7 @@ define i32 @test8(i64 %res) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: cmpq $3, %rdi # encoding: [0x48,0x83,0xff,0x03]
+; CHECK-NEXT: cmpl $3, %edi # encoding: [0x83,0xff,0x03]
; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
@@ -224,7 +224,7 @@ define i32 @test11(i64 %l) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: shrq $47, %rdi # encoding: [0x48,0xc1,0xef,0x2f]
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: cmpq $1, %rdi # encoding: [0x48,0x83,0xff,0x01]
+; CHECK-NEXT: cmpl $1, %edi # encoding: [0x83,0xff,0x01]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
diff --git a/llvm/test/CodeGen/X86/ctpop-combine.ll b/llvm/test/CodeGen/X86/ctpop-combine.ll
index cdef5771f2c2..e60935cc8732 100644
--- a/llvm/test/CodeGen/X86/ctpop-combine.ll
+++ b/llvm/test/CodeGen/X86/ctpop-combine.ll
@@ -116,7 +116,7 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
; POPCOUNT: # %bb.0:
; POPCOUNT-NEXT: popcntq %rdi, %rcx
; POPCOUNT-NEXT: xorl %eax, %eax
-; POPCOUNT-NEXT: cmpq $1, %rcx
+; POPCOUNT-NEXT: cmpl $1, %ecx
; POPCOUNT-NEXT: sete %al
; POPCOUNT-NEXT: retq
;
@@ -141,7 +141,7 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
; POPCOUNT: # %bb.0:
; POPCOUNT-NEXT: popcntq %rdi, %rcx
; POPCOUNT-NEXT: xorl %eax, %eax
-; POPCOUNT-NEXT: cmpq $1, %rcx
+; POPCOUNT-NEXT: cmpl $1, %ecx
; POPCOUNT-NEXT: setne %al
; POPCOUNT-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/pr37063.ll b/llvm/test/CodeGen/X86/pr37063.ll
index cf5e1fa56700..f7f8d622da55 100644
--- a/llvm/test/CodeGen/X86/pr37063.ll
+++ b/llvm/test/CodeGen/X86/pr37063.ll
@@ -6,9 +6,9 @@ declare void @bar()
define void @foo(i64*) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %start
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: andl $-2, %eax
-; CHECK-NEXT: cmpq $4, %rax
+; CHECK-NEXT: movl (%rdi), %eax
+; CHECK-NEXT: andl $6, %eax
+; CHECK-NEXT: cmpl $4, %eax
; CHECK-NEXT: jne .LBB0_2
; CHECK-NEXT: # %bb.1: # %bb1
; CHECK-NEXT: retq
More information about the llvm-commits
mailing list