[llvm] a942a94 - [X86] Improve (select carry C1+1 C1)

Mon Feb 20 16:38:27 PST 2023

Author: Kazu Hirata
Date: 2023-02-20T16:38:21-08:00
New Revision: a942a944245374fc62a5af8ee3abbc579f5ee7a5

URL: https://github.com/llvm/llvm-project/commit/a942a944245374fc62a5af8ee3abbc579f5ee7a5
DIFF: https://github.com/llvm/llvm-project/commit/a942a944245374fc62a5af8ee3abbc579f5ee7a5.diff

LOG: [X86] Improve (select carry C1+1 C1)

Without this patch:

  return X < 4 ? 3 : 2;

  return X < 9 ? 7 : 6;

are compiled as:

  31 c0                   xor    %eax,%eax
  83 ff 04                cmp    $0x4,%edi
  0f 93 c0                setae  %al
  83 f0 03                xor    $0x3,%eax

  31 c0                   xor    %eax,%eax
  83 ff 09                cmp    $0x9,%edi
  0f 92 c0                setb   %al
  83 c8 06                or     $0x6,%eax

respectively.  With this patch, we generate:

  31 c0                   xor    %eax,%eax
  83 ff 04                cmp    $0x4,%edi
  83 d0 02                adc    $0x2,%eax

  31 c0                   xor    %eax,%eax
  83 ff 04                cmp    $0x4,%edi
  83 d0 02                adc    $0x2,%eax

respectively, saving 3 bytes while reducing the tree height.

This patch recognizes the equivalence of OR and ADD
(if bits do not overlap) and delegates to combineAddOrSubToADCOrSBB
for further processing.  The same applies to the equivalence of XOR
and SUB.

Differential Revision: https://reviews.llvm.org/D143838

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/select_const.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 03736ac0234d0..8d8f186b6baa8 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -50108,6 +50108,34 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
+static SDValue combineOrXorWithSETCC(SDNode *N, SDValue N0, SDValue N1,
+                                     SelectionDAG &DAG) {
+  assert((N->getOpcode() == ISD::XOR || N->getOpcode() == ISD::OR) &&
+         "Unexpected opcode");
+
+  // Delegate to combineAddOrSubToADCOrSBB if we have:
+  //
+  //   (xor/or (zero_extend (setcc)) imm)
+  //
+  // where imm is odd if and only if we have xor, in which case the XOR/OR are
+  // equivalent to a SUB/ADD, respectively.
+  if (N0.getOpcode() == ISD::ZERO_EXTEND &&
+      N0.getOperand(0).getOpcode() == X86ISD::SETCC && N0.hasOneUse()) {
+    if (auto *N1C = dyn_cast<ConstantSDNode>(N1)) {
+      bool IsSub = N->getOpcode() == ISD::XOR;
+      bool N1COdd = N1C->getZExtValue() & 1;
+      if (IsSub ? N1COdd : !N1COdd) {
+        SDLoc DL(N);
+        EVT VT = N->getValueType(0);
+        if (SDValue R = combineAddOrSubToADCOrSBB(IsSub, DL, VT, N1, N0, DAG))
+          return R;
+      }
+    }
+  }
+
+  return SDValue();
+}
+
 static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
                          TargetLowering::DAGCombinerInfo &DCI,
                          const X86Subtarget &Subtarget) {
@@ -50255,6 +50283,9 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
     if (SDValue R = foldMaskedMerge(N, DAG))
       return R;
 
+  if (SDValue R = combineOrXorWithSETCC(N, N0, N1, DAG))
+    return R;
+
   return SDValue();
 }
 
@@ -52730,6 +52761,9 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
   if (SDValue SetCC = foldXor1SetCC(N, DAG))
     return SetCC;
 
+  if (SDValue R = combineOrXorWithSETCC(N, N0, N1, DAG))
+    return R;
+
   if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))
     return RV;
 

diff  --git a/llvm/test/CodeGen/X86/select_const.ll b/llvm/test/CodeGen/X86/select_const.ll
index 431560b642b51..d35cd65a5db91 100644
--- a/llvm/test/CodeGen/X86/select_const.ll
+++ b/llvm/test/CodeGen/X86/select_const.ll
@@ -525,9 +525,8 @@ define i32 @select_eq0_3_2(i32 %X) {
 ; CHECK-LABEL: select_eq0_3_2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    orl $2, %eax
+; CHECK-NEXT:    cmpl $1, %edi
+; CHECK-NEXT:    adcl $2, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp eq i32 %X, 0
   %sel = select i1 %cmp, i32 3, i32 2
@@ -539,8 +538,7 @@ define i32 @select_ugt3_2_3(i32 %X) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $4, %edi
-; CHECK-NEXT:    setae %al
-; CHECK-NEXT:    xorl $3, %eax
+; CHECK-NEXT:    adcl $2, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp ugt i32 %X, 3
   %sel = select i1 %cmp, i32 2, i32 3
@@ -552,8 +550,7 @@ define i32 @select_ult9_7_6(i32 %X) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $9, %edi
-; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    orl $6, %eax
+; CHECK-NEXT:    adcl $6, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp ult i32 %X, 9
   %sel = select i1 %cmp, i32 7, i32 6
@@ -563,23 +560,21 @@ define i32 @select_ult9_7_6(i32 %X) {
 define i32 @select_ult2_2_3(i32 %X) {
 ; CHECK-LABEL: select_ult2_2_3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $2, %edi
-; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    xorl $3, %eax
+; CHECK-NEXT:    movl $3, %eax
+; CHECK-NEXT:    sbbl $0, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp ult i32 %X, 2
   %cond = select i1 %cmp, i32 2, i32 3
   ret i32 %cond
 }
 
-define i32 @select_ugt2_3_2(i32 %X) {
-; CHECK-LABEL: select_ugt2_3_2:
+define i32 @select_ugt3_3_2(i32 %X) {
+; CHECK-LABEL: select_ugt3_3_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $4, %edi
-; CHECK-NEXT:    setae %al
-; CHECK-NEXT:    orl $2, %eax
+; CHECK-NEXT:    movl $2, %eax
+; CHECK-NEXT:    sbbl $-1, %eax
 ; CHECK-NEXT:    retq
   %cmp.inv = icmp ugt i32 %X, 3
   %cond = select i1 %cmp.inv, i32 3, i32 2