[llvm] 68defc0 - [x86] make select lowering using SBB hack more flexible

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 7 10:24:54 PST 2022


Author: Sanjay Patel
Date: 2022-01-07T13:23:09-05:00
New Revision: 68defc013444f21a46757b64e9144bb218c9e7eb

URL: https://github.com/llvm/llvm-project/commit/68defc013444f21a46757b64e9144bb218c9e7eb
DIFF: https://github.com/llvm/llvm-project/commit/68defc013444f21a46757b64e9144bb218c9e7eb.diff

LOG: [x86] make select lowering using SBB hack more flexible

select (X != 0), -1, Y --> 0 - X; or (sbb), Y
select (X != 0), Y, -1 --> X - 1; or (sbb), Y

We already had these x86 carry-flag transforms, but one was over-specified to
handle a "0" select arm only. That's just a special-case of the more general
pattern (the 'or' will be deleted if Y is zero).

This is part of solving #53006, but it misses that example because some other
combine has already converted that exact pattern into math ops.

Differential Revision: https://reviews.llvm.org/D116765

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/sdiv_fix_sat.ll
    llvm/test/CodeGen/X86/select.ll
    llvm/test/CodeGen/X86/umul_fix_sat.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e2351c765e981..e6f929ed3e8f3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24547,27 +24547,24 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
       SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
       SDVTList CmpVTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
 
-      // Apply further optimizations for special cases
-      // (select (x != 0), -1, 0) -> neg & sbb
-      // (select (x == 0), 0, -1) -> neg & sbb
-      if (isNullConstant(Y) &&
-          (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
+      // 'X - 1' sets the carry flag if X == 0.
+      // '0 - X' sets the carry flag if X != 0.
+      // Convert the carry flag to a -1/0 mask with sbb:
+      // select (X != 0), -1, Y --> 0 - X; or (sbb), Y
+      // select (X == 0), Y, -1 --> 0 - X; or (sbb), Y
+      // select (X != 0), Y, -1 --> X - 1; or (sbb), Y
+      // select (X == 0), -1, Y --> X - 1; or (sbb), Y
+      if (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE)) {
         SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType());
-        SDValue Neg = DAG.getNode(X86ISD::SUB, DL, CmpVTs, Zero, CmpOp0);
-        Zero = DAG.getConstant(0, DL, Op.getValueType());
-        return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Neg.getValue(1));
+        Cmp = DAG.getNode(X86ISD::SUB, DL, CmpVTs, Zero, CmpOp0);
+      } else {
+        SDValue One = DAG.getConstant(1, DL, CmpOp0.getValueType());
+        Cmp = DAG.getNode(X86ISD::SUB, DL, CmpVTs, CmpOp0, One);
       }
-
-      Cmp = DAG.getNode(X86ISD::SUB, DL, CmpVTs,
-                        CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType()));
-
+      // TODO: We don't need "0 - 0" here. This should use X86ISD::SETCC_CARRY.
       SDValue Zero = DAG.getConstant(0, DL, Op.getValueType());
       SDValue Res =   // Res = 0 or -1.
         DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp.getValue(1));
-
-      if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E))
-        Res = DAG.getNOT(DL, Res, Res.getValueType());
-
       return DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
     } else if (!Subtarget.hasCMov() && CondCode == X86::COND_E &&
                Cmp.getOperand(0).getOpcode() == ISD::AND &&

diff  --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index 44acdc7e676a5..9b964b147d553 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -1157,6 +1157,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    testl %edx, %edx
 ; X86-NEXT:    movl $0, %eax
 ; X86-NEXT:    cmovsl %edx, %eax
@@ -1168,11 +1169,11 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    sarl $31, %ecx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %ecx, %esi
-; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
 ; X86-NEXT:    testl %eax, %eax
-; X86-NEXT:    cmovel %eax, %esi
-; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmovel %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl $0, %ecx
 ; X86-NEXT:    cmovsl %eax, %ecx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -1193,140 +1194,134 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    movl $-1, %eax
 ; X86-NEXT:    cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, %ebx
 ; X86-NEXT:    sarl $31, %ebx
 ; X86-NEXT:    movl %ebx, %eax
 ; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    testl %esi, %esi
-; X86-NEXT:    cmovel %esi, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl $0, %ecx
-; X86-NEXT:    cmovsl %esi, %ecx
-; X86-NEXT:    movl $-1, %eax
-; X86-NEXT:    cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %edx, %eax
-; X86-NEXT:    sarl $31, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    testl %edx, %edx
-; X86-NEXT:    cmovel %edx, %eax
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    cmovel %ecx, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    andl %eax, %ebx
-; X86-NEXT:    cmpl $1, %eax
-; X86-NEXT:    movl $0, %edx
-; X86-NEXT:    sbbl %edx, %edx
-; X86-NEXT:    notl %edx
-; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT:    movl $0, %esi
-; X86-NEXT:    cmovel %esi, %ebx
-; X86-NEXT:    cmpl $-1, %ebx
 ; X86-NEXT:    movl $0, %eax
-; X86-NEXT:    cmovel %edx, %eax
-; X86-NEXT:    testl %ecx, %ecx
-; X86-NEXT:    cmovsl %esi, %edx
+; X86-NEXT:    cmovsl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %edx
 ; X86-NEXT:    movl $-1, %esi
-; X86-NEXT:    cmovsl %esi, %ebx
+; X86-NEXT:    cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    sarl $31, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT:    cmpl $-1, %ecx
-; X86-NEXT:    cmovnel %ebx, %ecx
-; X86-NEXT:    cmovel %eax, %edx
-; X86-NEXT:    shrdl $1, %ecx, %edx
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    cmovel %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    andl %eax, %edi
-; X86-NEXT:    cmpl $1, %eax
+; X86-NEXT:    andl %eax, %ebx
+; X86-NEXT:    negl %eax
 ; X86-NEXT:    movl $0, %ecx
 ; X86-NEXT:    sbbl %ecx, %ecx
-; X86-NEXT:    notl %ecx
 ; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    cmovnel %esi, %ecx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    cmovel %edx, %ebx
+; X86-NEXT:    cmpl $-1, %ebx
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    cmovel %ecx, %esi
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    cmovsl %edx, %ecx
+; X86-NEXT:    movl $-1, %edx
+; X86-NEXT:    cmovsl %edx, %ebx
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    cmpl $-1, %eax
+; X86-NEXT:    cmovel %esi, %ecx
+; X86-NEXT:    cmovnel %ebx, %eax
+; X86-NEXT:    shldl $31, %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    andl %eax, %edi
+; X86-NEXT:    negl %eax
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %eax, %eax
 ; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
 ; X86-NEXT:    movl $0, %esi
 ; X86-NEXT:    cmovel %esi, %edi
 ; X86-NEXT:    cmpl $-1, %edi
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    cmovel %eax, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    cmovsl %esi, %eax
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    movl $-1, %ebx
+; X86-NEXT:    cmovsl %ebx, %edi
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    cmpl $-1, %ecx
+; X86-NEXT:    cmovel %edx, %eax
+; X86-NEXT:    cmovnel %edi, %ecx
+; X86-NEXT:    shldl $31, %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    andl %eax, %edx
+; X86-NEXT:    negl %eax
 ; X86-NEXT:    movl $0, %eax
-; X86-NEXT:    cmovel %ecx, %eax
+; X86-NEXT:    sbbl %eax, %eax
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    cmovel %esi, %edx
+; X86-NEXT:    cmpl $-1, %edx
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    cmovel %eax, %ecx
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
 ; X86-NEXT:    testl %ebx, %ebx
-; X86-NEXT:    cmovsl %esi, %ecx
-; X86-NEXT:    movl $-1, %esi
-; X86-NEXT:    cmovsl %esi, %edi
+; X86-NEXT:    cmovsl %esi, %eax
+; X86-NEXT:    movl $-1, %edi
+; X86-NEXT:    cmovsl %edi, %edx
 ; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
 ; X86-NEXT:    cmpl $-1, %ebx
-; X86-NEXT:    cmovnel %edi, %ebx
-; X86-NEXT:    cmovel %eax, %ecx
-; X86-NEXT:    shrdl $1, %ebx, %ecx
+; X86-NEXT:    cmovel %ecx, %eax
+; X86-NEXT:    cmovnel %edx, %ebx
+; X86-NEXT:    shldl $31, %eax, %ebx
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; X86-NEXT:    andl %eax, %edi
-; X86-NEXT:    cmpl $1, %eax
-; X86-NEXT:    movl $0, %esi
-; X86-NEXT:    sbbl %esi, %esi
-; X86-NEXT:    notl %esi
-; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT:    movl %edi, %eax
-; X86-NEXT:    movl $0, %edi
-; X86-NEXT:    cmovel %edi, %eax
-; X86-NEXT:    cmpl $-1, %eax
-; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    negl %eax
 ; X86-NEXT:    movl $0, %eax
-; X86-NEXT:    cmovel %esi, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT:    testl %ebx, %ebx
-; X86-NEXT:    movl $0, %eax
-; X86-NEXT:    cmovsl %eax, %esi
-; X86-NEXT:    movl $-1, %eax
-; X86-NEXT:    cmovsl %eax, %edi
-; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT:    cmpl $-1, %ebx
-; X86-NEXT:    cmovnel %edi, %ebx
-; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT:    shrdl $1, %ebx, %esi
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT:    andl %eax, %ebx
-; X86-NEXT:    cmpl $1, %eax
-; X86-NEXT:    movl $0, %edi
-; X86-NEXT:    sbbl %edi, %edi
-; X86-NEXT:    notl %edi
-; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %eax, %eax
 ; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT:    movl %ebx, %eax
-; X86-NEXT:    movl $0, %ebx
-; X86-NEXT:    cmovel %ebx, %eax
-; X86-NEXT:    cmpl $-1, %eax
-; X86-NEXT:    movl $0, %ebx
-; X86-NEXT:    cmovel %edi, %ebx
-; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT:    movl $0, %ebx
-; X86-NEXT:    cmovsl %ebx, %edi
-; X86-NEXT:    movl $-1, %ebx
-; X86-NEXT:    cmovsl %ebx, %eax
-; X86-NEXT:    movl %eax, %ebx
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    cmpl $-1, %eax
-; X86-NEXT:    cmovnel %ebx, %eax
-; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT:    shrdl $1, %eax, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    cmovel %esi, %edi
+; X86-NEXT:    cmpl $-1, %edi
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    cmovel %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    cmovsl %esi, %eax
+; X86-NEXT:    movl $-1, %esi
+; X86-NEXT:    cmovsl %esi, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    andl %edx, %esi
+; X86-NEXT:    cmpl $-1, %esi
+; X86-NEXT:    cmovel %ecx, %eax
+; X86-NEXT:    cmovnel %edi, %esi
+; X86-NEXT:    shldl $31, %eax, %esi
 ; X86-NEXT:    movl 8(%ebp), %eax
-; X86-NEXT:    movl %edi, 12(%eax)
-; X86-NEXT:    movl %esi, 8(%eax)
+; X86-NEXT:    movl %esi, 12(%eax)
+; X86-NEXT:    movl %ebx, 8(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X86-NEXT:    movl %ecx, 4(%eax)
-; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, (%eax)
 ; X86-NEXT:    leal -12(%ebp), %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi

diff  --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index 5448c46867e08..62e3403392f7e 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -793,9 +793,8 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK-LABEL: test11:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    cmpq $1, %rdi
+; CHECK-NEXT:    negq %rdi
 ; CHECK-NEXT:    sbbq %rax, %rax
-; CHECK-NEXT:    notq %rax
 ; CHECK-NEXT:    orq %rsi, %rax
 ; CHECK-NEXT:    retq
 ;
@@ -833,9 +832,8 @@ define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK-LABEL: test11a:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    cmpq $1, %rdi
+; CHECK-NEXT:    negq %rdi
 ; CHECK-NEXT:    sbbq %rax, %rax
-; CHECK-NEXT:    notq %rax
 ; CHECK-NEXT:    orq %rsi, %rax
 ; CHECK-NEXT:    retq
 ;
@@ -872,27 +870,24 @@ define i32 @eqzero_const_or_all_ones(i32 %x) {
 ; CHECK-LABEL: eqzero_const_or_all_ones:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    cmpl $1, %edi
+; CHECK-NEXT:    negl %edi
 ; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    notl %eax
 ; CHECK-NEXT:    orl $42, %eax
 ; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: eqzero_const_or_all_ones:
 ; ATHLON:       ## %bb.0:
 ; ATHLON-NEXT:    xorl %eax, %eax
-; ATHLON-NEXT:    cmpl $1, {{[0-9]+}}(%esp)
+; ATHLON-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; ATHLON-NEXT:    sbbl %eax, %eax
-; ATHLON-NEXT:    notl %eax
 ; ATHLON-NEXT:    orl $42, %eax
 ; ATHLON-NEXT:    retl
 ;
 ; MCU-LABEL: eqzero_const_or_all_ones:
 ; MCU:       # %bb.0:
 ; MCU-NEXT:    xorl %ecx, %ecx
-; MCU-NEXT:    cmpl $1, %eax
+; MCU-NEXT:    negl %eax
 ; MCU-NEXT:    sbbl %ecx, %ecx
-; MCU-NEXT:    notl %ecx
 ; MCU-NEXT:    orl $42, %ecx
 ; MCU-NEXT:    movl %ecx, %eax
 ; MCU-NEXT:    retl
@@ -971,9 +966,8 @@ define i8 @nezero_all_ones_or_const(i8 %x) {
 ; CHECK-LABEL: nezero_all_ones_or_const:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    cmpb $1, %dil
+; CHECK-NEXT:    negb %dil
 ; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    notb %al
 ; CHECK-NEXT:    orb $42, %al
 ; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
@@ -981,22 +975,19 @@ define i8 @nezero_all_ones_or_const(i8 %x) {
 ; ATHLON-LABEL: nezero_all_ones_or_const:
 ; ATHLON:       ## %bb.0:
 ; ATHLON-NEXT:    xorl %eax, %eax
-; ATHLON-NEXT:    cmpb $1, {{[0-9]+}}(%esp)
+; ATHLON-NEXT:    cmpb {{[0-9]+}}(%esp), %al
 ; ATHLON-NEXT:    sbbl %eax, %eax
-; ATHLON-NEXT:    notb %al
 ; ATHLON-NEXT:    orb $42, %al
 ; ATHLON-NEXT:    ## kill: def $al killed $al killed $eax
 ; ATHLON-NEXT:    retl
 ;
 ; MCU-LABEL: nezero_all_ones_or_const:
 ; MCU:       # %bb.0:
-; MCU-NEXT:    movl %eax, %ecx
-; MCU-NEXT:    xorl %eax, %eax
-; MCU-NEXT:    cmpb $1, %cl
-; MCU-NEXT:    sbbl %eax, %eax
-; MCU-NEXT:    notb %al
-; MCU-NEXT:    orb $42, %al
-; MCU-NEXT:    # kill: def $al killed $al killed $eax
+; MCU-NEXT:    xorl %ecx, %ecx
+; MCU-NEXT:    negb %al
+; MCU-NEXT:    sbbl %ecx, %ecx
+; MCU-NEXT:    orb $42, %cl
+; MCU-NEXT:    movl %ecx, %eax
 ; MCU-NEXT:    retl
   %z = icmp ne i8 %x, 0
   %r = select i1 %z, i8 -1, i8 42

diff  --git a/llvm/test/CodeGen/X86/umul_fix_sat.ll b/llvm/test/CodeGen/X86/umul_fix_sat.ll
index 155dab5b47373..504557242c305 100644
--- a/llvm/test/CodeGen/X86/umul_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/umul_fix_sat.ll
@@ -443,32 +443,31 @@ define i64 @func7(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl %esi, %eax
 ; X86-NEXT:    mull %ebp
 ; X86-NEXT:    movl %edx, %ecx
-; X86-NEXT:    movl %eax, %esi
-; X86-NEXT:    movl %ebx, %eax
-; X86-NEXT:    mull %edi
-; X86-NEXT:    addl %edx, %esi
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %edx, %edi
 ; X86-NEXT:    adcl $0, %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    mull %ebp
-; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %edx, %esi
 ; X86-NEXT:    movl %eax, %ebp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    mull %edi
-; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    addl %edi, %eax
 ; X86-NEXT:    adcl %ecx, %edx
-; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, %esi
 ; X86-NEXT:    addl %ebp, %edx
-; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    adcl $0, %esi
 ; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    cmpl $1, %ebx
+; X86-NEXT:    negl %esi
 ; X86-NEXT:    sbbl %ecx, %ecx
-; X86-NEXT:    notl %ecx
 ; X86-NEXT:    orl %ecx, %eax
 ; X86-NEXT:    orl %ecx, %edx
 ; X86-NEXT:    popl %esi
@@ -524,9 +523,8 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    movl %edx, %esi
 ; X86-NEXT:    shrl $31, %esi
 ; X86-NEXT:    xorl %edi, %edi
-; X86-NEXT:    cmpl $1, %esi
+; X86-NEXT:    negl %esi
 ; X86-NEXT:    sbbl %edi, %edi
-; X86-NEXT:    notl %edi
 ; X86-NEXT:    orl %edi, %eax
 ; X86-NEXT:    shrdl $31, %ecx, %edx
 ; X86-NEXT:    orl %edi, %edx


        


More information about the llvm-commits mailing list