[llvm] baadbe0 - [X86] Fold cmpeq/ne(trunc(logic(x)),0) --> cmpeq/ne(logic(x),0)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 12 08:05:56 PDT 2021
Author: Simon Pilgrim
Date: 2021-04-12T16:05:34+01:00
New Revision: baadbe04bf753382728ca6ff3e05227e4c773cec
URL: https://github.com/llvm/llvm-project/commit/baadbe04bf753382728ca6ff3e05227e4c773cec
DIFF: https://github.com/llvm/llvm-project/commit/baadbe04bf753382728ca6ff3e05227e4c773cec.diff
LOG: [X86] Fold cmpeq/ne(trunc(logic(x)),0) --> cmpeq/ne(logic(x),0)
Fixes the issues noted in PR48768, where the and/or/xor instruction had been promoted to avoid i8/i16 partial-dependencies, but the test against zero had not.
We can almost certainly relax this fold to work for any truncation, although it breaks a number of existing folds (notable movmsk folds which tend to rely on the truncate to determine the demanded bits/elts in the source vector).
There is a reverse combine in TargetLowering.SimplifySetCC so we must wait until after legalization before attempting this.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/2012-08-16-setcc.ll
llvm/test/CodeGen/X86/and-with-overflow.ll
llvm/test/CodeGen/X86/jump_sign.ll
llvm/test/CodeGen/X86/or-with-overflow.ll
llvm/test/CodeGen/X86/xor-with-overflow.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4ecd9f86322b..3d6035afa027 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48166,6 +48166,7 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
}
static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
const SDValue LHS = N->getOperand(0);
@@ -48222,6 +48223,26 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);
if (SDValue AndN = MatchAndCmpEq(RHS, LHS))
return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);
+
+ // cmpeq(trunc(logic(x)),0) --> cmpeq(logic(x),0)
+ // cmpne(trunc(logic(x)),0) --> cmpne(logic(x),0)
+ // iff x upper bits are zero.
+ // TODO: Remove the logic-op only limit?
+ // TODO: Add support for RHS to be truncate as well?
+ if (LHS.getOpcode() == ISD::TRUNCATE &&
+ LHS.getOperand(0).getScalarValueSizeInBits() >= 32 &&
+ isNullConstant(RHS) && !DCI.isBeforeLegalize()) {
+ unsigned LHSOpc = LHS.getOperand(0).getOpcode();
+ EVT SrcVT = LHS.getOperand(0).getValueType();
+ APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
+ OpVT.getScalarSizeInBits());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if ((LHSOpc == ISD::AND || LHSOpc == ISD::OR || LHSOpc == ISD::XOR) &&
+ DAG.MaskedValueIsZero(LHS.getOperand(0), UpperBits) &&
+ TLI.isTypeLegal(LHS.getOperand(0).getValueType()))
+ return DAG.getSetCC(DL, VT, LHS.getOperand(0),
+ DAG.getConstant(0, DL, SrcVT), CC);
+ }
}
}
@@ -50675,7 +50696,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
return combineEXTEND_VECTOR_INREG(N, DAG, DCI, Subtarget);
- case ISD::SETCC: return combineSetCC(N, DAG, Subtarget);
+ case ISD::SETCC: return combineSetCC(N, DAG, DCI, Subtarget);
case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget);
case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget);
case X86ISD::PACKSS:
diff --git a/llvm/test/CodeGen/X86/2012-08-16-setcc.ll b/llvm/test/CodeGen/X86/2012-08-16-setcc.ll
index f82439a7d82e..89ae5680e3ba 100644
--- a/llvm/test/CodeGen/X86/2012-08-16-setcc.ll
+++ b/llvm/test/CodeGen/X86/2012-08-16-setcc.ll
@@ -6,8 +6,8 @@
define i32 @and_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
; CHECK-LABEL: and_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: testb %dil, %sil
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andl %esi, %eax
; CHECK-NEXT: cmovnel %edx, %eax
; CHECK-NEXT: retq
%1 = and i8 %b, %a
@@ -19,7 +19,7 @@ define i32 @and_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
define zeroext i1 @and_2(i8 zeroext %a, i8 zeroext %b) {
; CHECK-LABEL: and_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: testb %dil, %sil
+; CHECK-NEXT: testl %edi, %esi
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%1 = and i8 %b, %a
@@ -31,7 +31,7 @@ define i32 @xor_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
; CHECK-LABEL: xor_1:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: xorb %dil, %sil
+; CHECK-NEXT: xorl %esi, %edi
; CHECK-NEXT: cmovnel %edx, %eax
; CHECK-NEXT: retq
%1 = xor i8 %b, %a
@@ -43,7 +43,7 @@ define i32 @xor_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
define zeroext i1 @xor_2(i8 zeroext %a, i8 zeroext %b) {
; CHECK-LABEL: xor_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorb %dil, %sil
+; CHECK-NEXT: xorl %esi, %edi
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%1 = xor i8 %b, %a
diff --git a/llvm/test/CodeGen/X86/and-with-overflow.ll b/llvm/test/CodeGen/X86/and-with-overflow.ll
index f197ae1f2995..d83f91a4f0d2 100644
--- a/llvm/test/CodeGen/X86/and-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/and-with-overflow.ll
@@ -48,7 +48,6 @@ define i8 @and_i8_rr(i8 zeroext %0, i8 zeroext %1) {
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: andl %edi, %eax
-; X64-NEXT: testb %al, %al
; X64-NEXT: cmovel %edi, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
@@ -103,7 +102,6 @@ define i16 @and_i16_rr(i16 zeroext %0, i16 zeroext %1) {
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: andl %edi, %eax
-; X64-NEXT: testw %ax, %ax
; X64-NEXT: cmovel %edi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll
index b2cfa72235fc..b436b1f35c8f 100644
--- a/llvm/test/CodeGen/X86/jump_sign.ll
+++ b/llvm/test/CodeGen/X86/jump_sign.ll
@@ -388,10 +388,11 @@ define i32 @func_test1(i32 %p1) nounwind uwtable {
; CHECK-LABEL: func_test1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl b, %eax
+; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: setb %cl
; CHECK-NEXT: movl a, %eax
-; CHECK-NEXT: testb %al, %cl
+; CHECK-NEXT: testl %eax, %ecx
; CHECK-NEXT: je .LBB18_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: decl %eax
diff --git a/llvm/test/CodeGen/X86/or-with-overflow.ll b/llvm/test/CodeGen/X86/or-with-overflow.ll
index f9a519383fb7..3e39ab65ad2a 100644
--- a/llvm/test/CodeGen/X86/or-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/or-with-overflow.ll
@@ -48,7 +48,6 @@ define i8 @or_i8_rr(i8 zeroext %0, i8 zeroext %1) {
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: orl %edi, %eax
-; X64-NEXT: testb %al, %al
; X64-NEXT: cmovel %edi, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
@@ -103,7 +102,6 @@ define i16 @or_i16_rr(i16 zeroext %0, i16 zeroext %1) {
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: orl %edi, %eax
-; X64-NEXT: testw %ax, %ax
; X64-NEXT: cmovel %edi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/xor-with-overflow.ll b/llvm/test/CodeGen/X86/xor-with-overflow.ll
index cb48aa259c91..ad2da087929e 100644
--- a/llvm/test/CodeGen/X86/xor-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/xor-with-overflow.ll
@@ -48,7 +48,6 @@ define i8 @xor_i8_rr(i8 zeroext %0, i8 zeroext %1) {
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: xorl %edi, %eax
-; X64-NEXT: testb %al, %al
; X64-NEXT: cmovel %edi, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
@@ -103,7 +102,6 @@ define i16 @xor_i16_rr(i16 zeroext %0, i16 zeroext %1) {
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: xorl %edi, %eax
-; X64-NEXT: testw %ax, %ax
; X64-NEXT: cmovel %edi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
More information about the llvm-commits
mailing list