[llvm] baadbe0 - [X86] Fold cmpeq/ne(trunc(logic(x)),0) --> cmpeq/ne(logic(x),0)

Mon Apr 12 08:05:56 PDT 2021

Author: Simon Pilgrim
Date: 2021-04-12T16:05:34+01:00
New Revision: baadbe04bf753382728ca6ff3e05227e4c773cec

URL: https://github.com/llvm/llvm-project/commit/baadbe04bf753382728ca6ff3e05227e4c773cec
DIFF: https://github.com/llvm/llvm-project/commit/baadbe04bf753382728ca6ff3e05227e4c773cec.diff

LOG: [X86] Fold cmpeq/ne(trunc(logic(x)),0) --> cmpeq/ne(logic(x),0)

Fixes the issues noted in PR48768, where the and/or/xor instruction had been promoted to avoid i8/i16 partial-dependencies, but the test against zero had not.

We can almost certainly relax this fold to work for any truncation, although it breaks a number of existing folds (notable movmsk folds which tend to rely on the truncate to determine the demanded bits/elts in the source vector).

There is a reverse combine in TargetLowering.SimplifySetCC so we must wait until after legalization before attempting this.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/2012-08-16-setcc.ll
    llvm/test/CodeGen/X86/and-with-overflow.ll
    llvm/test/CodeGen/X86/jump_sign.ll
    llvm/test/CodeGen/X86/or-with-overflow.ll
    llvm/test/CodeGen/X86/xor-with-overflow.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4ecd9f86322b..3d6035afa027 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48166,6 +48166,7 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
 }
 
 static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
+                            TargetLowering::DAGCombinerInfo &DCI,
                             const X86Subtarget &Subtarget) {
   const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
   const SDValue LHS = N->getOperand(0);
@@ -48222,6 +48223,26 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
         return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);
       if (SDValue AndN = MatchAndCmpEq(RHS, LHS))
         return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);
+
+      // cmpeq(trunc(logic(x)),0) --> cmpeq(logic(x),0)
+      // cmpne(trunc(logic(x)),0) --> cmpne(logic(x),0)
+      // iff x upper bits are zero.
+      // TODO: Remove the logic-op only limit?
+      // TODO: Add support for RHS to be truncate as well?
+      if (LHS.getOpcode() == ISD::TRUNCATE &&
+          LHS.getOperand(0).getScalarValueSizeInBits() >= 32 &&
+          isNullConstant(RHS) && !DCI.isBeforeLegalize()) {
+        unsigned LHSOpc = LHS.getOperand(0).getOpcode();
+        EVT SrcVT = LHS.getOperand(0).getValueType();
+        APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
+                                                OpVT.getScalarSizeInBits());
+        const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+        if ((LHSOpc == ISD::AND || LHSOpc == ISD::OR || LHSOpc == ISD::XOR) &&
+            DAG.MaskedValueIsZero(LHS.getOperand(0), UpperBits) &&
+            TLI.isTypeLegal(LHS.getOperand(0).getValueType()))
+          return DAG.getSetCC(DL, VT, LHS.getOperand(0),
+                              DAG.getConstant(0, DL, SrcVT), CC);
+      }
     }
   }
 
@@ -50675,7 +50696,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SIGN_EXTEND_VECTOR_INREG:
   case ISD::ZERO_EXTEND_VECTOR_INREG:
     return combineEXTEND_VECTOR_INREG(N, DAG, DCI, Subtarget);
-  case ISD::SETCC:          return combineSetCC(N, DAG, Subtarget);
+  case ISD::SETCC:          return combineSetCC(N, DAG, DCI, Subtarget);
   case X86ISD::SETCC:       return combineX86SetCC(N, DAG, Subtarget);
   case X86ISD::BRCOND:      return combineBrCond(N, DAG, Subtarget);
   case X86ISD::PACKSS:

diff  --git a/llvm/test/CodeGen/X86/2012-08-16-setcc.ll b/llvm/test/CodeGen/X86/2012-08-16-setcc.ll
index f82439a7d82e..89ae5680e3ba 100644
--- a/llvm/test/CodeGen/X86/2012-08-16-setcc.ll
+++ b/llvm/test/CodeGen/X86/2012-08-16-setcc.ll
@@ -6,8 +6,8 @@
 define i32 @and_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
 ; CHECK-LABEL: and_1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testb %dil, %sil
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andl %esi, %eax
 ; CHECK-NEXT:    cmovnel %edx, %eax
 ; CHECK-NEXT:    retq
   %1 = and i8 %b, %a
@@ -19,7 +19,7 @@ define i32 @and_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
 define zeroext i1 @and_2(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: and_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    testb %dil, %sil
+; CHECK-NEXT:    testl %edi, %esi
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    retq
   %1 = and i8 %b, %a
@@ -31,7 +31,7 @@ define i32 @xor_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
 ; CHECK-LABEL: xor_1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    xorb %dil, %sil
+; CHECK-NEXT:    xorl %esi, %edi
 ; CHECK-NEXT:    cmovnel %edx, %eax
 ; CHECK-NEXT:    retq
   %1 = xor i8 %b, %a
@@ -43,7 +43,7 @@ define i32 @xor_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
 define zeroext i1 @xor_2(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: xor_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorb %dil, %sil
+; CHECK-NEXT:    xorl %esi, %edi
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    retq
   %1 = xor i8 %b, %a

diff  --git a/llvm/test/CodeGen/X86/and-with-overflow.ll b/llvm/test/CodeGen/X86/and-with-overflow.ll
index f197ae1f2995..d83f91a4f0d2 100644
--- a/llvm/test/CodeGen/X86/and-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/and-with-overflow.ll
@@ -48,7 +48,6 @@ define i8 @and_i8_rr(i8 zeroext %0, i8 zeroext %1) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    andl %edi, %eax
-; X64-NEXT:    testb %al, %al
 ; X64-NEXT:    cmovel %edi, %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
@@ -103,7 +102,6 @@ define i16 @and_i16_rr(i16 zeroext %0, i16 zeroext %1) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    andl %edi, %eax
-; X64-NEXT:    testw %ax, %ax
 ; X64-NEXT:    cmovel %edi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll
index b2cfa72235fc..b436b1f35c8f 100644
--- a/llvm/test/CodeGen/X86/jump_sign.ll
+++ b/llvm/test/CodeGen/X86/jump_sign.ll
@@ -388,10 +388,11 @@ define i32 @func_test1(i32 %p1) nounwind uwtable {
 ; CHECK-LABEL: func_test1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl b, %eax
+; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    setb %cl
 ; CHECK-NEXT:    movl a, %eax
-; CHECK-NEXT:    testb %al, %cl
+; CHECK-NEXT:    testl %eax, %ecx
 ; CHECK-NEXT:    je .LBB18_2
 ; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    decl %eax

diff  --git a/llvm/test/CodeGen/X86/or-with-overflow.ll b/llvm/test/CodeGen/X86/or-with-overflow.ll
index f9a519383fb7..3e39ab65ad2a 100644
--- a/llvm/test/CodeGen/X86/or-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/or-with-overflow.ll
@@ -48,7 +48,6 @@ define i8 @or_i8_rr(i8 zeroext %0, i8 zeroext %1) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    orl %edi, %eax
-; X64-NEXT:    testb %al, %al
 ; X64-NEXT:    cmovel %edi, %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
@@ -103,7 +102,6 @@ define i16 @or_i16_rr(i16 zeroext %0, i16 zeroext %1) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    orl %edi, %eax
-; X64-NEXT:    testw %ax, %ax
 ; X64-NEXT:    cmovel %edi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/xor-with-overflow.ll b/llvm/test/CodeGen/X86/xor-with-overflow.ll
index cb48aa259c91..ad2da087929e 100644
--- a/llvm/test/CodeGen/X86/xor-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/xor-with-overflow.ll
@@ -48,7 +48,6 @@ define i8 @xor_i8_rr(i8 zeroext %0, i8 zeroext %1) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    xorl %edi, %eax
-; X64-NEXT:    testb %al, %al
 ; X64-NEXT:    cmovel %edi, %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
@@ -103,7 +102,6 @@ define i16 @xor_i16_rr(i16 zeroext %0, i16 zeroext %1) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    xorl %edi, %eax
-; X64-NEXT:    testw %ax, %ax
 ; X64-NEXT:    cmovel %edi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq