[llvm] 438ac28 - [X86] combineAddOrSubToADCOrSBB - Fold ADD/SUB + (AND(SRL(X,Y),1) -> ADC/SBB+BT(X,Y) (REAPPLIED)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 21 14:38:00 PDT 2022


Author: Simon Pilgrim
Date: 2022-03-21T21:37:42Z
New Revision: 438ac282db97c584daf2d4d1a90e6b3d49ef9189

URL: https://github.com/llvm/llvm-project/commit/438ac282db97c584daf2d4d1a90e6b3d49ef9189
DIFF: https://github.com/llvm/llvm-project/commit/438ac282db97c584daf2d4d1a90e6b3d49ef9189.diff

LOG: [X86] combineAddOrSubToADCOrSBB - Fold ADD/SUB + (AND(SRL(X,Y),1) -> ADC/SBB+BT(X,Y) (REAPPLIED)

As suggested on PR35908, if we are adding/subtracting an extracted bit, attempt to use BT instead to fold the op and use a ADC/SBB op.

Reapply with extra type legality checks - LowerAndToBT was originally only used during lowering, now that it can occur earlier we might encounter illegal types that we can either promote to i32 or just bail.

Differential Revision: https://reviews.llvm.org/D122084

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/add-sub-bool.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6e1c83f20c7af..a634574d68271 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23523,9 +23523,8 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
 
 /// Result of 'and' is compared against zero. Change to a BT node if possible.
 /// Returns the BT node and the condition code needed to use it.
-static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
-                            const SDLoc &dl, SelectionDAG &DAG,
-                            SDValue &X86CC) {
+static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl,
+                            SelectionDAG &DAG, X86::CondCode &X86CC) {
   assert(And.getOpcode() == ISD::AND && "Expected AND node!");
   SDValue Op0 = And.getOperand(0);
   SDValue Op1 = And.getOperand(1);
@@ -23587,9 +23586,13 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
   // that doing a bittest on the i32 value is ok.  We extend to i32 because
   // the encoding for the i16 version is larger than the i32 version.
   // Also promote i16 to i32 for performance / code size reason.
-  if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16)
+  if (Src.getValueType().getScalarSizeInBits() < 32)
     Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src);
 
+  // No legal type found, give up.
+  if (!DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
+    return SDValue();
+
   // See if we can use the 32-bit instruction instead of the 64-bit one for a
   // shorter encoding. Since the former takes the modulo 32 of BitNo and the
   // latter takes the modulo 64, this is only valid if the 5th bit of BitNo is
@@ -23603,8 +23606,7 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
   if (Src.getValueType() != BitNo.getValueType())
     BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
 
-  X86CC = DAG.getTargetConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B,
-                                dl, MVT::i8);
+  X86CC = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
   return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo);
 }
 
@@ -24310,8 +24312,11 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
   // Lower ((X >>s N) & 1) != 0 to BT(X, N).
   if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && isNullConstant(Op1) &&
       (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-    if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CC))
+    X86::CondCode X86CondCode;
+    if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CondCode)) {
+      X86CC = DAG.getTargetConstant(X86CondCode, dl, MVT::i8);
       return BT;
+    }
   }
 
   // Try to use PTEST/PMOVMSKB for a tree ORs equality compared with 0.
@@ -24783,9 +24788,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     // We know the result of AND is compared against zero. Try to match
     // it to BT.
     if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
-      SDValue BTCC;
-      if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, BTCC)) {
-        CC = BTCC;
+      X86::CondCode X86CondCode;
+      if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, X86CondCode)) {
+        CC = DAG.getTargetConstant(X86CondCode, DL, MVT::i8);
         Cond = BT;
         AddTest = false;
       }
@@ -52294,6 +52299,7 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
 /// If this is an add or subtract where one operand is produced by a cmp+setcc,
 /// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
 /// with CMP+{ADC, SBB}.
+/// Also try (ADD/SUB)+(AND(SRL,1)) bit extraction pattern with BT+{ADC, SBB}.
 static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
                                          SDValue X, SDValue Y,
                                          SelectionDAG &DAG) {
@@ -52304,11 +52310,20 @@ static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
   if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse())
     Y = Y.getOperand(0);
 
-  if (Y.getOpcode() != X86ISD::SETCC || !Y.hasOneUse())
+  if (!Y.hasOneUse())
     return SDValue();
 
-  X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0);
-  SDValue EFLAGS = Y.getOperand(1);
+  X86::CondCode CC;
+  SDValue EFLAGS;
+  if (Y.getOpcode() == X86ISD::SETCC) {
+    CC = (X86::CondCode)Y.getConstantOperandVal(0);
+    EFLAGS = Y.getOperand(1);
+  } else if (Y.getOpcode() == ISD::AND && isOneConstant(Y.getOperand(1))) {
+    EFLAGS = LowerAndToBT(Y, ISD::SETNE, DL, DAG, CC);
+  }
+
+  if (!EFLAGS)
+    return SDValue();
 
   // If X is -1 or 0, then we have an opportunity to avoid constants required in
   // the general case below.

diff  --git a/llvm/test/CodeGen/X86/add-sub-bool.ll b/llvm/test/CodeGen/X86/add-sub-bool.ll
index 10e6fb2f4dfd1..f2dfc05b9843d 100644
--- a/llvm/test/CodeGen/X86/add-sub-bool.ll
+++ b/llvm/test/CodeGen/X86/add-sub-bool.ll
@@ -18,31 +18,16 @@ define i32 @test_i32_add_add_idx(i32 %x, i32 %y, i32 %z) nounwind {
 ; X86-LABEL: test_i32_add_add_idx:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    shrl $30, %eax
-; X86-NEXT:    andl $1, %eax
-; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    btl $30, {{[0-9]+}}(%esp)
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
-; NOTBM-LABEL: test_i32_add_add_idx:
-; NOTBM:       # %bb.0:
-; NOTBM-NEXT:    # kill: def $esi killed $esi def $rsi
-; NOTBM-NEXT:    # kill: def $edi killed $edi def $rdi
-; NOTBM-NEXT:    leal (%rdi,%rsi), %eax
-; NOTBM-NEXT:    shrl $30, %edx
-; NOTBM-NEXT:    andl $1, %edx
-; NOTBM-NEXT:    addl %edx, %eax
-; NOTBM-NEXT:    retq
-;
-; TBM-LABEL: test_i32_add_add_idx:
-; TBM:       # %bb.0:
-; TBM-NEXT:    # kill: def $esi killed $esi def $rsi
-; TBM-NEXT:    # kill: def $edi killed $edi def $rdi
-; TBM-NEXT:    bextrl $286, %edx, %eax # imm = 0x11E
-; TBM-NEXT:    addl %edi, %eax
-; TBM-NEXT:    addl %esi, %eax
-; TBM-NEXT:    retq
+; X64-LABEL: test_i32_add_add_idx:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    btl $30, %edx
+; X64-NEXT:    adcl %esi, %eax
+; X64-NEXT:    retq
   %add = add i32 %y, %x
   %shift = lshr i32 %z, 30
   %mask = and i32 %shift, 1
@@ -54,31 +39,16 @@ define i32 @test_i32_add_add_commute_idx(i32 %x, i32 %y, i32 %z) nounwind {
 ; X86-LABEL: test_i32_add_add_commute_idx:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $1, %eax
-; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    btl $2, {{[0-9]+}}(%esp)
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
-; NOTBM-LABEL: test_i32_add_add_commute_idx:
-; NOTBM:       # %bb.0:
-; NOTBM-NEXT:    # kill: def $esi killed $esi def $rsi
-; NOTBM-NEXT:    # kill: def $edi killed $edi def $rdi
-; NOTBM-NEXT:    leal (%rdi,%rsi), %eax
-; NOTBM-NEXT:    shrl $2, %edx
-; NOTBM-NEXT:    andl $1, %edx
-; NOTBM-NEXT:    addl %edx, %eax
-; NOTBM-NEXT:    retq
-;
-; TBM-LABEL: test_i32_add_add_commute_idx:
-; TBM:       # %bb.0:
-; TBM-NEXT:    # kill: def $esi killed $esi def $rsi
-; TBM-NEXT:    # kill: def $edi killed $edi def $rdi
-; TBM-NEXT:    bextrl $258, %edx, %eax # imm = 0x102
-; TBM-NEXT:    addl %edi, %eax
-; TBM-NEXT:    addl %esi, %eax
-; TBM-NEXT:    retq
+; X64-LABEL: test_i32_add_add_commute_idx:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    btl $2, %edx
+; X64-NEXT:    adcl %esi, %eax
+; X64-NEXT:    retq
   %add = add i32 %y, %x
   %shift = lshr i32 %z, 2
   %mask = and i32 %shift, 1
@@ -110,6 +80,27 @@ define i32 @test_i32_add_add_idx0(i32 %x, i32 %y, i32 %z) nounwind {
   ret i32 %add1
 }
 
+define i24 @test_i24_add_add_idx(i24 %x, i24 %y, i24 %z) nounwind {
+; X86-LABEL: test_i24_add_add_idx:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    btl $15, {{[0-9]+}}(%esp)
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_i24_add_add_idx:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    btl $15, %edx
+; X64-NEXT:    adcl %esi, %eax
+; X64-NEXT:    retq
+  %add = add i24 %y, %x
+  %shift = lshr i24 %z, 15
+  %mask = and i24 %shift, 1
+  %add1 = add i24 %add, %mask
+  ret i24 %add1
+}
+
 define i128 @test_i128_add_add_idx(i128 %x, i128 %y, i128 %z) nounwind {
 ; X86-LABEL: test_i128_add_add_idx:
 ; X86:       # %bb.0:
@@ -198,29 +189,18 @@ define i32 @test_i32_add_sub_commute_idx(i32 %x, i32 %y, i32 %z) nounwind {
 ; X86-LABEL: test_i32_add_sub_commute_idx:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    shrl $8, %eax
-; X86-NEXT:    andl $1, %eax
-; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    btl $8, {{[0-9]+}}(%esp)
+; X86-NEXT:    adcl $0, %eax
 ; X86-NEXT:    retl
 ;
-; NOTBM-LABEL: test_i32_add_sub_commute_idx:
-; NOTBM:       # %bb.0:
-; NOTBM-NEXT:    # kill: def $edx killed $edx def $rdx
-; NOTBM-NEXT:    # kill: def $edi killed $edi def $rdi
-; NOTBM-NEXT:    subl %esi, %edi
-; NOTBM-NEXT:    shrl $8, %edx
-; NOTBM-NEXT:    andl $1, %edx
-; NOTBM-NEXT:    leal (%rdx,%rdi), %eax
-; NOTBM-NEXT:    retq
-;
-; TBM-LABEL: test_i32_add_sub_commute_idx:
-; TBM:       # %bb.0:
-; TBM-NEXT:    subl %esi, %edi
-; TBM-NEXT:    bextrl $264, %edx, %eax # imm = 0x108
-; TBM-NEXT:    addl %edi, %eax
-; TBM-NEXT:    retq
+; X64-LABEL: test_i32_add_sub_commute_idx:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    btl $8, %edx
+; X64-NEXT:    adcl $0, %eax
+; X64-NEXT:    retq
   %sub = sub i32 %x, %y
   %shift = lshr i32 %z, 8
   %mask = and i32 %shift, 1
@@ -231,32 +211,20 @@ define i32 @test_i32_add_sub_commute_idx(i32 %x, i32 %y, i32 %z) nounwind {
 define i32 @test_i32_sub_add_idx(i32 %x, i32 %y, i32 %z) nounwind {
 ; X86-LABEL: test_i32_sub_add_idx:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shrl %ecx
-; X86-NEXT:    andl $1, %ecx
-; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    btl $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    sbbl $0, %eax
 ; X86-NEXT:    retl
 ;
-; NOTBM-LABEL: test_i32_sub_add_idx:
-; NOTBM:       # %bb.0:
-; NOTBM-NEXT:    # kill: def $esi killed $esi def $rsi
-; NOTBM-NEXT:    # kill: def $edi killed $edi def $rdi
-; NOTBM-NEXT:    leal (%rdi,%rsi), %eax
-; NOTBM-NEXT:    shrl %edx
-; NOTBM-NEXT:    andl $1, %edx
-; NOTBM-NEXT:    subl %edx, %eax
-; NOTBM-NEXT:    retq
-;
-; TBM-LABEL: test_i32_sub_add_idx:
-; TBM:       # %bb.0:
-; TBM-NEXT:    # kill: def $esi killed $esi def $rsi
-; TBM-NEXT:    # kill: def $edi killed $edi def $rdi
-; TBM-NEXT:    leal (%rdi,%rsi), %eax
-; TBM-NEXT:    bextrl $257, %edx, %ecx # imm = 0x101
-; TBM-NEXT:    subl %ecx, %eax
-; TBM-NEXT:    retq
+; X64-LABEL: test_i32_sub_add_idx:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (%rdi,%rsi), %eax
+; X64-NEXT:    btl $1, %edx
+; X64-NEXT:    sbbl $0, %eax
+; X64-NEXT:    retq
   %add = add i32 %y, %x
   %shift = lshr i32 %z, 1
   %mask = and i32 %shift, 1
@@ -268,28 +236,18 @@ define i32 @test_i32_sub_sub_idx(i32 %x, i32 %y, i32 %z) nounwind {
 ; X86-LABEL: test_i32_sub_sub_idx:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shrl $16, %eax
-; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    btl $16, {{[0-9]+}}(%esp)
+; X86-NEXT:    adcl $0, %eax
 ; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
-; NOTBM-LABEL: test_i32_sub_sub_idx:
-; NOTBM:       # %bb.0:
-; NOTBM-NEXT:    # kill: def $edx killed $edx def $rdx
-; NOTBM-NEXT:    # kill: def $edi killed $edi def $rdi
-; NOTBM-NEXT:    shrl $16, %edx
-; NOTBM-NEXT:    andl $1, %edx
-; NOTBM-NEXT:    subl %esi, %edx
-; NOTBM-NEXT:    leal (%rdx,%rdi), %eax
-; NOTBM-NEXT:    retq
-;
-; TBM-LABEL: test_i32_sub_sub_idx:
-; TBM:       # %bb.0:
-; TBM-NEXT:    bextrl $272, %edx, %eax # imm = 0x110
-; TBM-NEXT:    subl %esi, %eax
-; TBM-NEXT:    addl %edi, %eax
-; TBM-NEXT:    retq
+; X64-LABEL: test_i32_sub_sub_idx:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    btl $16, %edx
+; X64-NEXT:    adcl $0, %eax
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    retq
   %shift = lshr i32 %z, 16
   %mask = and i32 %shift, 1
   %sub0 = sub i32 %y, %mask
@@ -301,29 +259,16 @@ define i32 @test_i32_sub_sub_commute_idx(i32 %x, i32 %y, i32 %z) nounwind {
 ; X86-LABEL: test_i32_sub_sub_commute_idx:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    shrl $15, %ecx
-; X86-NEXT:    andl $1, %ecx
-; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    btl $15, {{[0-9]+}}(%esp)
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
-; NOTBM-LABEL: test_i32_sub_sub_commute_idx:
-; NOTBM:       # %bb.0:
-; NOTBM-NEXT:    movl %edi, %eax
-; NOTBM-NEXT:    shrl $15, %edx
-; NOTBM-NEXT:    andl $1, %edx
-; NOTBM-NEXT:    subl %esi, %eax
-; NOTBM-NEXT:    subl %edx, %eax
-; NOTBM-NEXT:    retq
-;
-; TBM-LABEL: test_i32_sub_sub_commute_idx:
-; TBM:       # %bb.0:
-; TBM-NEXT:    movl %edi, %eax
-; TBM-NEXT:    bextrl $271, %edx, %ecx # imm = 0x10F
-; TBM-NEXT:    subl %esi, %eax
-; TBM-NEXT:    subl %ecx, %eax
-; TBM-NEXT:    retq
+; X64-LABEL: test_i32_sub_sub_commute_idx:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    btl $15, %edx
+; X64-NEXT:    sbbl %esi, %eax
+; X64-NEXT:    retq
   %shift = lshr i32 %z, 15
   %mask = and i32 %shift, 1
   %sub0 = sub i32 %x, %y
@@ -334,29 +279,22 @@ define i32 @test_i32_sub_sub_commute_idx(i32 %x, i32 %y, i32 %z) nounwind {
 define i32 @test_i32_sub_sum_idx(i32 %x, i32 %y, i32 %z) nounwind {
 ; X86-LABEL: test_i32_sub_sum_idx:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shrl $30, %eax
-; X86-NEXT:    andl $1, %eax
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    btl $30, {{[0-9]+}}(%esp)
+; X86-NEXT:    sbbl $0, %eax
+; X86-NEXT:    negl %eax
 ; X86-NEXT:    retl
 ;
-; NOTBM-LABEL: test_i32_sub_sum_idx:
-; NOTBM:       # %bb.0:
-; NOTBM-NEXT:    movl %edx, %eax
-; NOTBM-NEXT:    shrl $30, %eax
-; NOTBM-NEXT:    andl $1, %eax
-; NOTBM-NEXT:    addl %esi, %edi
-; NOTBM-NEXT:    subl %edi, %eax
-; NOTBM-NEXT:    retq
-;
-; TBM-LABEL: test_i32_sub_sum_idx:
-; TBM:       # %bb.0:
-; TBM-NEXT:    bextrl $286, %edx, %eax # imm = 0x11E
-; TBM-NEXT:    addl %esi, %edi
-; TBM-NEXT:    subl %edi, %eax
-; TBM-NEXT:    retq
+; X64-LABEL: test_i32_sub_sum_idx:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (%rdi,%rsi), %eax
+; X64-NEXT:    btl $30, %edx
+; X64-NEXT:    sbbl $0, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    retq
   %shift = lshr i32 %z, 30
   %mask = and i32 %shift, 1
   %add = add i32 %y, %x
@@ -371,24 +309,18 @@ define i32 @test_i32_sub_sum_idx(i32 %x, i32 %y, i32 %z) nounwind {
 define i32 @test_i32_add_add_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
 ; X86-LABEL: test_i32_add_add_var:
 ; X86:       # %bb.0:
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    shrl %cl, %eax
-; X86-NEXT:    andl $1, %eax
-; X86-NEXT:    addl %edx, %eax
+; X86-NEXT:    btl %ecx, %edx
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_add_add_var:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    leal (%rdi,%rsi), %eax
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shrl %cl, %edx
-; X64-NEXT:    andl $1, %edx
-; X64-NEXT:    addl %edx, %eax
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    btl %ecx, %edx
+; X64-NEXT:    adcl %esi, %eax
 ; X64-NEXT:    retq
   %add = add i32 %y, %x
   %shift = lshr i32 %z, %w
@@ -400,24 +332,18 @@ define i32 @test_i32_add_add_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
 define i32 @test_i32_add_add_commute_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
 ; X86-LABEL: test_i32_add_add_commute_var:
 ; X86:       # %bb.0:
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    shrl %cl, %eax
-; X86-NEXT:    andl $1, %eax
-; X86-NEXT:    addl %edx, %eax
+; X86-NEXT:    btl %ecx, %edx
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_add_add_commute_var:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    leal (%rdi,%rsi), %eax
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shrl %cl, %edx
-; X64-NEXT:    andl $1, %edx
-; X64-NEXT:    addl %edx, %eax
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    btl %ecx, %edx
+; X64-NEXT:    adcl %esi, %eax
 ; X64-NEXT:    retq
   %add = add i32 %y, %x
   %shift = lshr i32 %z, %w
@@ -443,10 +369,10 @@ define i64 @test_i64_add_add_var(i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
 ; X86-NEXT:    shrl %cl, %edi
 ; X86-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-NEXT:    testb $32, %cl
-; X86-NEXT:    jne .LBB12_2
+; X86-NEXT:    jne .LBB13_2
 ; X86-NEXT:  # %bb.1:
 ; X86-NEXT:    movl %esi, %edi
-; X86-NEXT:  .LBB12_2:
+; X86-NEXT:  .LBB13_2:
 ; X86-NEXT:    andl $1, %edi
 ; X86-NEXT:    addl %edi, %eax
 ; X86-NEXT:    adcl $0, %edx
@@ -457,11 +383,9 @@ define i64 @test_i64_add_add_var(i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
 ;
 ; X64-LABEL: test_i64_add_add_var:
 ; X64:       # %bb.0:
-; X64-NEXT:    leaq (%rdi,%rsi), %rax
-; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NEXT:    shrq %cl, %rdx
-; X64-NEXT:    andl $1, %edx
-; X64-NEXT:    addq %rdx, %rax
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    btq %rcx, %rdx
+; X64-NEXT:    adcq %rsi, %rax
 ; X64-NEXT:    retq
   %add = add i64 %y, %x
   %shift = lshr i64 %z, %w
@@ -473,24 +397,20 @@ define i64 @test_i64_add_add_var(i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
 define i32 @test_i32_add_sub_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
 ; X86-LABEL: test_i32_add_sub_var:
 ; X86:       # %bb.0:
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    shrl %cl, %eax
-; X86-NEXT:    andl $1, %eax
-; X86-NEXT:    addl %edx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    btl %ecx, %edx
+; X86-NEXT:    adcl $0, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_add_sub_var:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edx killed $edx def $rdx
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    subl %esi, %edi
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shrl %cl, %edx
-; X64-NEXT:    andl $1, %edx
-; X64-NEXT:    leal (%rdx,%rdi), %eax
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    btl %ecx, %edx
+; X64-NEXT:    adcl $0, %eax
 ; X64-NEXT:    retq
   %sub = sub i32 %x, %y
   %shift = lshr i32 %z, %w
@@ -502,24 +422,20 @@ define i32 @test_i32_add_sub_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
 define i32 @test_i32_add_sub_commute_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
 ; X86-LABEL: test_i32_add_sub_commute_var:
 ; X86:       # %bb.0:
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    shrl %cl, %eax
-; X86-NEXT:    andl $1, %eax
-; X86-NEXT:    addl %edx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    btl %ecx, %edx
+; X86-NEXT:    adcl $0, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_add_sub_commute_var:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edx killed $edx def $rdx
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    subl %esi, %edi
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shrl %cl, %edx
-; X64-NEXT:    andl $1, %edx
-; X64-NEXT:    leal (%rdx,%rdi), %eax
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    btl %ecx, %edx
+; X64-NEXT:    adcl $0, %eax
 ; X64-NEXT:    retq
   %sub = sub i32 %x, %y
   %shift = lshr i32 %z, %w
@@ -531,13 +447,12 @@ define i32 @test_i32_add_sub_commute_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwin
 define i32 @test_i32_sub_add_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
 ; X86-LABEL: test_i32_sub_add_var:
 ; X86:       # %bb.0:
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shrl %cl, %edx
-; X86-NEXT:    andl $1, %edx
-; X86-NEXT:    subl %edx, %eax
+; X86-NEXT:    btl %ecx, %edx
+; X86-NEXT:    sbbl $0, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_sub_add_var:
@@ -545,10 +460,8 @@ define i32 @test_i32_sub_add_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    leal (%rdi,%rsi), %eax
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shrl %cl, %edx
-; X64-NEXT:    andl $1, %edx
-; X64-NEXT:    subl %edx, %eax
+; X64-NEXT:    btl %ecx, %edx
+; X64-NEXT:    sbbl $0, %eax
 ; X64-NEXT:    retq
   %add = add i32 %y, %x
   %shift = lshr i32 %z, %w
@@ -560,23 +473,20 @@ define i32 @test_i32_sub_add_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
 define i32 @test_i32_sub_sub_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
 ; X86-LABEL: test_i32_sub_sub_var:
 ; X86:       # %bb.0:
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shrl %cl, %eax
-; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    btl %ecx, %edx
+; X86-NEXT:    adcl $0, %eax
 ; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_sub_sub_var:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edx killed $edx def $rdx
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shrl %cl, %edx
-; X64-NEXT:    andl $1, %edx
-; X64-NEXT:    subl %esi, %edx
-; X64-NEXT:    leal (%rdx,%rdi), %eax
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    btl %ecx, %edx
+; X64-NEXT:    adcl $0, %eax
+; X64-NEXT:    subl %esi, %eax
 ; X64-NEXT:    retq
   %shift = lshr i32 %z, %w
   %mask = and i32 %shift, 1
@@ -589,22 +499,17 @@ define i32 @test_i32_sub_sub_commute_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwin
 ; X86-LABEL: test_i32_sub_sub_commute_var:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    shrl %cl, %edx
-; X86-NEXT:    andl $1, %edx
-; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    subl %edx, %eax
+; X86-NEXT:    btl %ecx, %edx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_sub_sub_commute_var:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shrl %cl, %edx
-; X64-NEXT:    andl $1, %edx
-; X64-NEXT:    subl %esi, %eax
-; X64-NEXT:    subl %edx, %eax
+; X64-NEXT:    btl %ecx, %edx
+; X64-NEXT:    sbbl %esi, %eax
 ; X64-NEXT:    retq
   %shift = lshr i32 %z, %w
   %mask = and i32 %shift, 1
@@ -616,23 +521,23 @@ define i32 @test_i32_sub_sub_commute_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwin
 define i32 @test_i32_sub_sum_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
 ; X86-LABEL: test_i32_sub_sum_var:
 ; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shrl %cl, %eax
-; X86-NEXT:    andl $1, %eax
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    subl %edx, %eax
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    btl %ecx, %edx
+; X86-NEXT:    sbbl $0, %eax
+; X86-NEXT:    negl %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i32_sub_sum_var:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edx, %eax
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shrl %cl, %eax
-; X64-NEXT:    andl $1, %eax
-; X64-NEXT:    addl %esi, %edi
-; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (%rdi,%rsi), %eax
+; X64-NEXT:    btl %ecx, %edx
+; X64-NEXT:    sbbl $0, %eax
+; X64-NEXT:    negl %eax
 ; X64-NEXT:    retq
   %shift = lshr i32 %z, %w
   %mask = and i32 %shift, 1


        


More information about the llvm-commits mailing list