[llvm] 438ac28 - [X86] combineAddOrSubToADCOrSBB - Fold ADD/SUB + (AND(SRL(X,Y),1) -> ADC/SBB+BT(X,Y) (REAPPLIED)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 21 14:38:00 PDT 2022
Author: Simon Pilgrim
Date: 2022-03-21T21:37:42Z
New Revision: 438ac282db97c584daf2d4d1a90e6b3d49ef9189
URL: https://github.com/llvm/llvm-project/commit/438ac282db97c584daf2d4d1a90e6b3d49ef9189
DIFF: https://github.com/llvm/llvm-project/commit/438ac282db97c584daf2d4d1a90e6b3d49ef9189.diff
LOG: [X86] combineAddOrSubToADCOrSBB - Fold ADD/SUB + (AND(SRL(X,Y),1) -> ADC/SBB+BT(X,Y) (REAPPLIED)
As suggested on PR35908, if we are adding/subtracting an extracted bit, attempt to use BT instead to fold the op and use a ADC/SBB op.
Reapply with extra type legality checks - LowerAndToBT was originally only used during lowering, now that it can occur earlier we might encounter illegal types that we can either promote to i32 or just bail.
Differential Revision: https://reviews.llvm.org/D122084
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/add-sub-bool.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6e1c83f20c7af..a634574d68271 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23523,9 +23523,8 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
/// Result of 'and' is compared against zero. Change to a BT node if possible.
/// Returns the BT node and the condition code needed to use it.
-static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
- const SDLoc &dl, SelectionDAG &DAG,
- SDValue &X86CC) {
+static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl,
+ SelectionDAG &DAG, X86::CondCode &X86CC) {
assert(And.getOpcode() == ISD::AND && "Expected AND node!");
SDValue Op0 = And.getOperand(0);
SDValue Op1 = And.getOperand(1);
@@ -23587,9 +23586,13 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
// that doing a bittest on the i32 value is ok. We extend to i32 because
// the encoding for the i16 version is larger than the i32 version.
// Also promote i16 to i32 for performance / code size reason.
- if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16)
+ if (Src.getValueType().getScalarSizeInBits() < 32)
Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src);
+ // No legal type found, give up.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
+ return SDValue();
+
// See if we can use the 32-bit instruction instead of the 64-bit one for a
// shorter encoding. Since the former takes the modulo 32 of BitNo and the
// latter takes the modulo 64, this is only valid if the 5th bit of BitNo is
@@ -23603,8 +23606,7 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
if (Src.getValueType() != BitNo.getValueType())
BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
- X86CC = DAG.getTargetConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B,
- dl, MVT::i8);
+ X86CC = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo);
}
@@ -24310,8 +24312,11 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
// Lower ((X >>s N) & 1) != 0 to BT(X, N).
if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && isNullConstant(Op1) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
- if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CC))
+ X86::CondCode X86CondCode;
+ if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CondCode)) {
+ X86CC = DAG.getTargetConstant(X86CondCode, dl, MVT::i8);
return BT;
+ }
}
// Try to use PTEST/PMOVMSKB for a tree ORs equality compared with 0.
@@ -24783,9 +24788,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
- SDValue BTCC;
- if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, BTCC)) {
- CC = BTCC;
+ X86::CondCode X86CondCode;
+ if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, X86CondCode)) {
+ CC = DAG.getTargetConstant(X86CondCode, DL, MVT::i8);
Cond = BT;
AddTest = false;
}
@@ -52294,6 +52299,7 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
/// If this is an add or subtract where one operand is produced by a cmp+setcc,
/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
/// with CMP+{ADC, SBB}.
+/// Also try (ADD/SUB)+(AND(SRL,1)) bit extraction pattern with BT+{ADC, SBB}.
static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
SDValue X, SDValue Y,
SelectionDAG &DAG) {
@@ -52304,11 +52310,20 @@ static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse())
Y = Y.getOperand(0);
- if (Y.getOpcode() != X86ISD::SETCC || !Y.hasOneUse())
+ if (!Y.hasOneUse())
return SDValue();
- X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0);
- SDValue EFLAGS = Y.getOperand(1);
+ X86::CondCode CC;
+ SDValue EFLAGS;
+ if (Y.getOpcode() == X86ISD::SETCC) {
+ CC = (X86::CondCode)Y.getConstantOperandVal(0);
+ EFLAGS = Y.getOperand(1);
+ } else if (Y.getOpcode() == ISD::AND && isOneConstant(Y.getOperand(1))) {
+ EFLAGS = LowerAndToBT(Y, ISD::SETNE, DL, DAG, CC);
+ }
+
+ if (!EFLAGS)
+ return SDValue();
// If X is -1 or 0, then we have an opportunity to avoid constants required in
// the general case below.
diff --git a/llvm/test/CodeGen/X86/add-sub-bool.ll b/llvm/test/CodeGen/X86/add-sub-bool.ll
index 10e6fb2f4dfd1..f2dfc05b9843d 100644
--- a/llvm/test/CodeGen/X86/add-sub-bool.ll
+++ b/llvm/test/CodeGen/X86/add-sub-bool.ll
@@ -18,31 +18,16 @@ define i32 @test_i32_add_add_idx(i32 %x, i32 %y, i32 %z) nounwind {
; X86-LABEL: test_i32_add_add_idx:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: shrl $30, %eax
-; X86-NEXT: andl $1, %eax
-; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: btl $30, {{[0-9]+}}(%esp)
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
-; NOTBM-LABEL: test_i32_add_add_idx:
-; NOTBM: # %bb.0:
-; NOTBM-NEXT: # kill: def $esi killed $esi def $rsi
-; NOTBM-NEXT: # kill: def $edi killed $edi def $rdi
-; NOTBM-NEXT: leal (%rdi,%rsi), %eax
-; NOTBM-NEXT: shrl $30, %edx
-; NOTBM-NEXT: andl $1, %edx
-; NOTBM-NEXT: addl %edx, %eax
-; NOTBM-NEXT: retq
-;
-; TBM-LABEL: test_i32_add_add_idx:
-; TBM: # %bb.0:
-; TBM-NEXT: # kill: def $esi killed $esi def $rsi
-; TBM-NEXT: # kill: def $edi killed $edi def $rdi
-; TBM-NEXT: bextrl $286, %edx, %eax # imm = 0x11E
-; TBM-NEXT: addl %edi, %eax
-; TBM-NEXT: addl %esi, %eax
-; TBM-NEXT: retq
+; X64-LABEL: test_i32_add_add_idx:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: btl $30, %edx
+; X64-NEXT: adcl %esi, %eax
+; X64-NEXT: retq
%add = add i32 %y, %x
%shift = lshr i32 %z, 30
%mask = and i32 %shift, 1
@@ -54,31 +39,16 @@ define i32 @test_i32_add_add_commute_idx(i32 %x, i32 %y, i32 %z) nounwind {
; X86-LABEL: test_i32_add_add_commute_idx:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: shrl $2, %eax
-; X86-NEXT: andl $1, %eax
-; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: btl $2, {{[0-9]+}}(%esp)
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
-; NOTBM-LABEL: test_i32_add_add_commute_idx:
-; NOTBM: # %bb.0:
-; NOTBM-NEXT: # kill: def $esi killed $esi def $rsi
-; NOTBM-NEXT: # kill: def $edi killed $edi def $rdi
-; NOTBM-NEXT: leal (%rdi,%rsi), %eax
-; NOTBM-NEXT: shrl $2, %edx
-; NOTBM-NEXT: andl $1, %edx
-; NOTBM-NEXT: addl %edx, %eax
-; NOTBM-NEXT: retq
-;
-; TBM-LABEL: test_i32_add_add_commute_idx:
-; TBM: # %bb.0:
-; TBM-NEXT: # kill: def $esi killed $esi def $rsi
-; TBM-NEXT: # kill: def $edi killed $edi def $rdi
-; TBM-NEXT: bextrl $258, %edx, %eax # imm = 0x102
-; TBM-NEXT: addl %edi, %eax
-; TBM-NEXT: addl %esi, %eax
-; TBM-NEXT: retq
+; X64-LABEL: test_i32_add_add_commute_idx:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: btl $2, %edx
+; X64-NEXT: adcl %esi, %eax
+; X64-NEXT: retq
%add = add i32 %y, %x
%shift = lshr i32 %z, 2
%mask = and i32 %shift, 1
@@ -110,6 +80,27 @@ define i32 @test_i32_add_add_idx0(i32 %x, i32 %y, i32 %z) nounwind {
ret i32 %add1
}
+define i24 @test_i24_add_add_idx(i24 %x, i24 %y, i24 %z) nounwind {
+; X86-LABEL: test_i24_add_add_idx:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: btl $15, {{[0-9]+}}(%esp)
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: test_i24_add_add_idx:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: btl $15, %edx
+; X64-NEXT: adcl %esi, %eax
+; X64-NEXT: retq
+ %add = add i24 %y, %x
+ %shift = lshr i24 %z, 15
+ %mask = and i24 %shift, 1
+ %add1 = add i24 %add, %mask
+ ret i24 %add1
+}
+
define i128 @test_i128_add_add_idx(i128 %x, i128 %y, i128 %z) nounwind {
; X86-LABEL: test_i128_add_add_idx:
; X86: # %bb.0:
@@ -198,29 +189,18 @@ define i32 @test_i32_add_sub_commute_idx(i32 %x, i32 %y, i32 %z) nounwind {
; X86-LABEL: test_i32_add_sub_commute_idx:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: shrl $8, %eax
-; X86-NEXT: andl $1, %eax
-; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: btl $8, {{[0-9]+}}(%esp)
+; X86-NEXT: adcl $0, %eax
; X86-NEXT: retl
;
-; NOTBM-LABEL: test_i32_add_sub_commute_idx:
-; NOTBM: # %bb.0:
-; NOTBM-NEXT: # kill: def $edx killed $edx def $rdx
-; NOTBM-NEXT: # kill: def $edi killed $edi def $rdi
-; NOTBM-NEXT: subl %esi, %edi
-; NOTBM-NEXT: shrl $8, %edx
-; NOTBM-NEXT: andl $1, %edx
-; NOTBM-NEXT: leal (%rdx,%rdi), %eax
-; NOTBM-NEXT: retq
-;
-; TBM-LABEL: test_i32_add_sub_commute_idx:
-; TBM: # %bb.0:
-; TBM-NEXT: subl %esi, %edi
-; TBM-NEXT: bextrl $264, %edx, %eax # imm = 0x108
-; TBM-NEXT: addl %edi, %eax
-; TBM-NEXT: retq
+; X64-LABEL: test_i32_add_sub_commute_idx:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: subl %esi, %eax
+; X64-NEXT: btl $8, %edx
+; X64-NEXT: adcl $0, %eax
+; X64-NEXT: retq
%sub = sub i32 %x, %y
%shift = lshr i32 %z, 8
%mask = and i32 %shift, 1
@@ -231,32 +211,20 @@ define i32 @test_i32_add_sub_commute_idx(i32 %x, i32 %y, i32 %z) nounwind {
define i32 @test_i32_sub_add_idx(i32 %x, i32 %y, i32 %z) nounwind {
; X86-LABEL: test_i32_sub_add_idx:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: shrl %ecx
-; X86-NEXT: andl $1, %ecx
-; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: btl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbl $0, %eax
; X86-NEXT: retl
;
-; NOTBM-LABEL: test_i32_sub_add_idx:
-; NOTBM: # %bb.0:
-; NOTBM-NEXT: # kill: def $esi killed $esi def $rsi
-; NOTBM-NEXT: # kill: def $edi killed $edi def $rdi
-; NOTBM-NEXT: leal (%rdi,%rsi), %eax
-; NOTBM-NEXT: shrl %edx
-; NOTBM-NEXT: andl $1, %edx
-; NOTBM-NEXT: subl %edx, %eax
-; NOTBM-NEXT: retq
-;
-; TBM-LABEL: test_i32_sub_add_idx:
-; TBM: # %bb.0:
-; TBM-NEXT: # kill: def $esi killed $esi def $rsi
-; TBM-NEXT: # kill: def $edi killed $edi def $rdi
-; TBM-NEXT: leal (%rdi,%rsi), %eax
-; TBM-NEXT: bextrl $257, %edx, %ecx # imm = 0x101
-; TBM-NEXT: subl %ecx, %eax
-; TBM-NEXT: retq
+; X64-LABEL: test_i32_sub_add_idx:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal (%rdi,%rsi), %eax
+; X64-NEXT: btl $1, %edx
+; X64-NEXT: sbbl $0, %eax
+; X64-NEXT: retq
%add = add i32 %y, %x
%shift = lshr i32 %z, 1
%mask = and i32 %shift, 1
@@ -268,28 +236,18 @@ define i32 @test_i32_sub_sub_idx(i32 %x, i32 %y, i32 %z) nounwind {
; X86-LABEL: test_i32_sub_sub_idx:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: shrl $16, %eax
-; X86-NEXT: andl $1, %eax
+; X86-NEXT: btl $16, {{[0-9]+}}(%esp)
+; X86-NEXT: adcl $0, %eax
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
-; NOTBM-LABEL: test_i32_sub_sub_idx:
-; NOTBM: # %bb.0:
-; NOTBM-NEXT: # kill: def $edx killed $edx def $rdx
-; NOTBM-NEXT: # kill: def $edi killed $edi def $rdi
-; NOTBM-NEXT: shrl $16, %edx
-; NOTBM-NEXT: andl $1, %edx
-; NOTBM-NEXT: subl %esi, %edx
-; NOTBM-NEXT: leal (%rdx,%rdi), %eax
-; NOTBM-NEXT: retq
-;
-; TBM-LABEL: test_i32_sub_sub_idx:
-; TBM: # %bb.0:
-; TBM-NEXT: bextrl $272, %edx, %eax # imm = 0x110
-; TBM-NEXT: subl %esi, %eax
-; TBM-NEXT: addl %edi, %eax
-; TBM-NEXT: retq
+; X64-LABEL: test_i32_sub_sub_idx:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: btl $16, %edx
+; X64-NEXT: adcl $0, %eax
+; X64-NEXT: subl %esi, %eax
+; X64-NEXT: retq
%shift = lshr i32 %z, 16
%mask = and i32 %shift, 1
%sub0 = sub i32 %y, %mask
@@ -301,29 +259,16 @@ define i32 @test_i32_sub_sub_commute_idx(i32 %x, i32 %y, i32 %z) nounwind {
; X86-LABEL: test_i32_sub_sub_commute_idx:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: shrl $15, %ecx
-; X86-NEXT: andl $1, %ecx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: btl $15, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
-; NOTBM-LABEL: test_i32_sub_sub_commute_idx:
-; NOTBM: # %bb.0:
-; NOTBM-NEXT: movl %edi, %eax
-; NOTBM-NEXT: shrl $15, %edx
-; NOTBM-NEXT: andl $1, %edx
-; NOTBM-NEXT: subl %esi, %eax
-; NOTBM-NEXT: subl %edx, %eax
-; NOTBM-NEXT: retq
-;
-; TBM-LABEL: test_i32_sub_sub_commute_idx:
-; TBM: # %bb.0:
-; TBM-NEXT: movl %edi, %eax
-; TBM-NEXT: bextrl $271, %edx, %ecx # imm = 0x10F
-; TBM-NEXT: subl %esi, %eax
-; TBM-NEXT: subl %ecx, %eax
-; TBM-NEXT: retq
+; X64-LABEL: test_i32_sub_sub_commute_idx:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: btl $15, %edx
+; X64-NEXT: sbbl %esi, %eax
+; X64-NEXT: retq
%shift = lshr i32 %z, 15
%mask = and i32 %shift, 1
%sub0 = sub i32 %x, %y
@@ -334,29 +279,22 @@ define i32 @test_i32_sub_sub_commute_idx(i32 %x, i32 %y, i32 %z) nounwind {
define i32 @test_i32_sub_sum_idx(i32 %x, i32 %y, i32 %z) nounwind {
; X86-LABEL: test_i32_sub_sum_idx:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: shrl $30, %eax
-; X86-NEXT: andl $1, %eax
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: btl $30, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbl $0, %eax
+; X86-NEXT: negl %eax
; X86-NEXT: retl
;
-; NOTBM-LABEL: test_i32_sub_sum_idx:
-; NOTBM: # %bb.0:
-; NOTBM-NEXT: movl %edx, %eax
-; NOTBM-NEXT: shrl $30, %eax
-; NOTBM-NEXT: andl $1, %eax
-; NOTBM-NEXT: addl %esi, %edi
-; NOTBM-NEXT: subl %edi, %eax
-; NOTBM-NEXT: retq
-;
-; TBM-LABEL: test_i32_sub_sum_idx:
-; TBM: # %bb.0:
-; TBM-NEXT: bextrl $286, %edx, %eax # imm = 0x11E
-; TBM-NEXT: addl %esi, %edi
-; TBM-NEXT: subl %edi, %eax
-; TBM-NEXT: retq
+; X64-LABEL: test_i32_sub_sum_idx:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal (%rdi,%rsi), %eax
+; X64-NEXT: btl $30, %edx
+; X64-NEXT: sbbl $0, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: retq
%shift = lshr i32 %z, 30
%mask = and i32 %shift, 1
%add = add i32 %y, %x
@@ -371,24 +309,18 @@ define i32 @test_i32_sub_sum_idx(i32 %x, i32 %y, i32 %z) nounwind {
define i32 @test_i32_add_add_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
; X86-LABEL: test_i32_add_add_var:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: addl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: shrl %cl, %eax
-; X86-NEXT: andl $1, %eax
-; X86-NEXT: addl %edx, %eax
+; X86-NEXT: btl %ecx, %edx
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
; X64-LABEL: test_i32_add_add_var:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: leal (%rdi,%rsi), %eax
-; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrl %cl, %edx
-; X64-NEXT: andl $1, %edx
-; X64-NEXT: addl %edx, %eax
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: btl %ecx, %edx
+; X64-NEXT: adcl %esi, %eax
; X64-NEXT: retq
%add = add i32 %y, %x
%shift = lshr i32 %z, %w
@@ -400,24 +332,18 @@ define i32 @test_i32_add_add_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
define i32 @test_i32_add_add_commute_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
; X86-LABEL: test_i32_add_add_commute_var:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: addl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: shrl %cl, %eax
-; X86-NEXT: andl $1, %eax
-; X86-NEXT: addl %edx, %eax
+; X86-NEXT: btl %ecx, %edx
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
; X64-LABEL: test_i32_add_add_commute_var:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: leal (%rdi,%rsi), %eax
-; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrl %cl, %edx
-; X64-NEXT: andl $1, %edx
-; X64-NEXT: addl %edx, %eax
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: btl %ecx, %edx
+; X64-NEXT: adcl %esi, %eax
; X64-NEXT: retq
%add = add i32 %y, %x
%shift = lshr i32 %z, %w
@@ -443,10 +369,10 @@ define i64 @test_i64_add_add_var(i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
; X86-NEXT: shrl %cl, %edi
; X86-NEXT: shrdl %cl, %ebx, %esi
; X86-NEXT: testb $32, %cl
-; X86-NEXT: jne .LBB12_2
+; X86-NEXT: jne .LBB13_2
; X86-NEXT: # %bb.1:
; X86-NEXT: movl %esi, %edi
-; X86-NEXT: .LBB12_2:
+; X86-NEXT: .LBB13_2:
; X86-NEXT: andl $1, %edi
; X86-NEXT: addl %edi, %eax
; X86-NEXT: adcl $0, %edx
@@ -457,11 +383,9 @@ define i64 @test_i64_add_add_var(i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
;
; X64-LABEL: test_i64_add_add_var:
; X64: # %bb.0:
-; X64-NEXT: leaq (%rdi,%rsi), %rax
-; X64-NEXT: # kill: def $cl killed $cl killed $rcx
-; X64-NEXT: shrq %cl, %rdx
-; X64-NEXT: andl $1, %edx
-; X64-NEXT: addq %rdx, %rax
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: btq %rcx, %rdx
+; X64-NEXT: adcq %rsi, %rax
; X64-NEXT: retq
%add = add i64 %y, %x
%shift = lshr i64 %z, %w
@@ -473,24 +397,20 @@ define i64 @test_i64_add_add_var(i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
define i32 @test_i32_add_sub_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
; X86-LABEL: test_i32_add_sub_var:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: shrl %cl, %eax
-; X86-NEXT: andl $1, %eax
-; X86-NEXT: addl %edx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: btl %ecx, %edx
+; X86-NEXT: adcl $0, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_i32_add_sub_var:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $edx killed $edx def $rdx
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: subl %esi, %edi
-; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrl %cl, %edx
-; X64-NEXT: andl $1, %edx
-; X64-NEXT: leal (%rdx,%rdi), %eax
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: subl %esi, %eax
+; X64-NEXT: btl %ecx, %edx
+; X64-NEXT: adcl $0, %eax
; X64-NEXT: retq
%sub = sub i32 %x, %y
%shift = lshr i32 %z, %w
@@ -502,24 +422,20 @@ define i32 @test_i32_add_sub_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
define i32 @test_i32_add_sub_commute_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
; X86-LABEL: test_i32_add_sub_commute_var:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: shrl %cl, %eax
-; X86-NEXT: andl $1, %eax
-; X86-NEXT: addl %edx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: btl %ecx, %edx
+; X86-NEXT: adcl $0, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_i32_add_sub_commute_var:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $edx killed $edx def $rdx
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: subl %esi, %edi
-; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrl %cl, %edx
-; X64-NEXT: andl $1, %edx
-; X64-NEXT: leal (%rdx,%rdi), %eax
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: subl %esi, %eax
+; X64-NEXT: btl %ecx, %edx
+; X64-NEXT: adcl $0, %eax
; X64-NEXT: retq
%sub = sub i32 %x, %y
%shift = lshr i32 %z, %w
@@ -531,13 +447,12 @@ define i32 @test_i32_add_sub_commute_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwin
define i32 @test_i32_sub_add_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
; X86-LABEL: test_i32_sub_add_var:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: shrl %cl, %edx
-; X86-NEXT: andl $1, %edx
-; X86-NEXT: subl %edx, %eax
+; X86-NEXT: btl %ecx, %edx
+; X86-NEXT: sbbl $0, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_i32_sub_add_var:
@@ -545,10 +460,8 @@ define i32 @test_i32_sub_add_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal (%rdi,%rsi), %eax
-; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrl %cl, %edx
-; X64-NEXT: andl $1, %edx
-; X64-NEXT: subl %edx, %eax
+; X64-NEXT: btl %ecx, %edx
+; X64-NEXT: sbbl $0, %eax
; X64-NEXT: retq
%add = add i32 %y, %x
%shift = lshr i32 %z, %w
@@ -560,23 +473,20 @@ define i32 @test_i32_sub_add_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
define i32 @test_i32_sub_sub_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
; X86-LABEL: test_i32_sub_sub_var:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: shrl %cl, %eax
-; X86-NEXT: andl $1, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: btl %ecx, %edx
+; X86-NEXT: adcl $0, %eax
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
; X64-LABEL: test_i32_sub_sub_var:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $edx killed $edx def $rdx
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrl %cl, %edx
-; X64-NEXT: andl $1, %edx
-; X64-NEXT: subl %esi, %edx
-; X64-NEXT: leal (%rdx,%rdi), %eax
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: btl %ecx, %edx
+; X64-NEXT: adcl $0, %eax
+; X64-NEXT: subl %esi, %eax
; X64-NEXT: retq
%shift = lshr i32 %z, %w
%mask = and i32 %shift, 1
@@ -589,22 +499,17 @@ define i32 @test_i32_sub_sub_commute_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwin
; X86-LABEL: test_i32_sub_sub_commute_var:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: shrl %cl, %edx
-; X86-NEXT: andl $1, %edx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: subl %edx, %eax
+; X86-NEXT: btl %ecx, %edx
+; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
; X64-LABEL: test_i32_sub_sub_commute_var:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrl %cl, %edx
-; X64-NEXT: andl $1, %edx
-; X64-NEXT: subl %esi, %eax
-; X64-NEXT: subl %edx, %eax
+; X64-NEXT: btl %ecx, %edx
+; X64-NEXT: sbbl %esi, %eax
; X64-NEXT: retq
%shift = lshr i32 %z, %w
%mask = and i32 %shift, 1
@@ -616,23 +521,23 @@ define i32 @test_i32_sub_sub_commute_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwin
define i32 @test_i32_sub_sum_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind {
; X86-LABEL: test_i32_sub_sum_var:
; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: shrl %cl, %eax
-; X86-NEXT: andl $1, %eax
-; X86-NEXT: addl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: subl %edx, %eax
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: btl %ecx, %edx
+; X86-NEXT: sbbl $0, %eax
+; X86-NEXT: negl %eax
; X86-NEXT: retl
;
; X64-LABEL: test_i32_sub_sum_var:
; X64: # %bb.0:
-; X64-NEXT: movl %edx, %eax
-; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrl %cl, %eax
-; X64-NEXT: andl $1, %eax
-; X64-NEXT: addl %esi, %edi
-; X64-NEXT: subl %edi, %eax
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal (%rdi,%rsi), %eax
+; X64-NEXT: btl %ecx, %edx
+; X64-NEXT: sbbl $0, %eax
+; X64-NEXT: negl %eax
; X64-NEXT: retq
%shift = lshr i32 %z, %w
%mask = and i32 %shift, 1
More information about the llvm-commits
mailing list