[llvm] c64f5d4 - [X86] Attempt to fold EFLAGS into X86ISD::ADD/SUB ops
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue May 17 02:59:45 PDT 2022
Author: Simon Pilgrim
Date: 2022-05-17T10:59:24+01:00
New Revision: c64f5d44ad3bebeaccceb20b2730b4e65bb87297
URL: https://github.com/llvm/llvm-project/commit/c64f5d44ad3bebeaccceb20b2730b4e65bb87297
DIFF: https://github.com/llvm/llvm-project/commit/c64f5d44ad3bebeaccceb20b2730b4e65bb87297.diff
LOG: [X86] Attempt to fold EFLAGS into X86ISD::ADD/SUB ops
We already use combineAddOrSubToADCOrSBB to fold extended EFLAGS results into ISD::ADD/SUB ops as X86ISD::ADC/SBB carry ops.
This patch extends this to also try to fold EFLAGS results with X86ISD::ADD/SUB ops
Differential Revision: https://reviews.llvm.org/D125642
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/add-sub-bool.ll
llvm/test/CodeGen/X86/addcarry.ll
llvm/test/CodeGen/X86/combine-sbb.ll
llvm/test/CodeGen/X86/subcarry.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a84cee543d5fc..f72e8149bca55 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -52270,7 +52270,8 @@ static bool onlyZeroFlagUsed(SDValue Flags) {
/// Also try (ADD/SUB)+(AND(SRL,1)) bit extraction pattern with BT+{ADC, SBB}.
static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
SDValue X, SDValue Y,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ bool ZeroSecondOpOnly = false) {
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
@@ -52294,7 +52295,7 @@ static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
// If X is -1 or 0, then we have an opportunity to avoid constants required in
// the general case below.
auto *ConstantX = dyn_cast<ConstantSDNode>(X);
- if (ConstantX) {
+ if (ConstantX && !ZeroSecondOpOnly) {
if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnes()) ||
(IsSub && CC == X86::COND_B && ConstantX->isZero())) {
// This is a complicated way to get -1 or 0 from the carry flag:
@@ -52332,6 +52333,9 @@ static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
DAG.getConstant(0, DL, VT), EFLAGS);
}
+ if (ZeroSecondOpOnly)
+ return SDValue();
+
if (CC == X86::COND_A) {
// Try to convert COND_A into COND_B in an attempt to facilitate
// materializing "setb reg".
@@ -52589,7 +52593,8 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
MVT VT = LHS.getSimpleValueType();
- unsigned GenericOpc = X86ISD::ADD == N->getOpcode() ? ISD::ADD : ISD::SUB;
+ bool IsSub = X86ISD::SUB == N->getOpcode();
+ unsigned GenericOpc = IsSub ? ISD::SUB : ISD::ADD;
// If we don't use the flag result, simplify back to a generic ADD/SUB.
if (!N->hasAnyUseOfValue(1)) {
@@ -52611,7 +52616,10 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
MatchGeneric(LHS, RHS, false);
MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode());
- return SDValue();
+ // TODO: Can we drop the ZeroSecondOpOnly limit? This is to guarantee that the
+ // EFLAGS result doesn't change.
+ return combineAddOrSubToADCOrSBB(IsSub, DL, VT, LHS, RHS, DAG,
+ /*ZeroSecondOpOnly*/ true);
}
static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {
diff --git a/llvm/test/CodeGen/X86/add-sub-bool.ll b/llvm/test/CodeGen/X86/add-sub-bool.ll
index f93d1598f9aed..248a9f19f1fbe 100644
--- a/llvm/test/CodeGen/X86/add-sub-bool.ll
+++ b/llvm/test/CodeGen/X86/add-sub-bool.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64,NOTBM
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+tbm | FileCheck %s --check-prefixes=X64,TBM
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+tbm | FileCheck %s --check-prefixes=X64
; PR35908 - Fold ADD/SUB and bit extracts into ADC/SBB+BT
;
@@ -104,55 +104,40 @@ define i24 @test_i24_add_add_idx(i24 %x, i24 %y, i24 %z) nounwind {
define i128 @test_i128_add_add_idx(i128 %x, i128 %y, i128 %z) nounwind {
; X86-LABEL: test_i128_add_add_idx:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: addl {{[0-9]+}}(%esp), %esi
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: shrl $5, %edx
-; X86-NEXT: andl $1, %edx
-; X86-NEXT: addl %ebx, %edx
-; X86-NEXT: adcl $0, %edi
+; X86-NEXT: btl $5, {{[0-9]+}}(%esp)
; X86-NEXT: adcl $0, %esi
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: adcl $0, %edx
; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: movl %edi, 4(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
; X86-NEXT: retl $4
;
-; NOTBM-LABEL: test_i128_add_add_idx:
-; NOTBM: # %bb.0:
-; NOTBM-NEXT: movq %r9, %rax
-; NOTBM-NEXT: addq %rdx, %rdi
-; NOTBM-NEXT: adcq %rcx, %rsi
-; NOTBM-NEXT: shrl $5, %eax
-; NOTBM-NEXT: andl $1, %eax
-; NOTBM-NEXT: addq %rdi, %rax
-; NOTBM-NEXT: adcq $0, %rsi
-; NOTBM-NEXT: movq %rsi, %rdx
-; NOTBM-NEXT: retq
-;
-; TBM-LABEL: test_i128_add_add_idx:
-; TBM: # %bb.0:
-; TBM-NEXT: addq %rdx, %rdi
-; TBM-NEXT: adcq %rcx, %rsi
-; TBM-NEXT: bextrl $261, %r9d, %eax # imm = 0x105
-; TBM-NEXT: addq %rdi, %rax
-; TBM-NEXT: adcq $0, %rsi
-; TBM-NEXT: movq %rsi, %rdx
-; TBM-NEXT: retq
+; X64-LABEL: test_i128_add_add_idx:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: addq %rdx, %rax
+; X64-NEXT: adcq %rcx, %rsi
+; X64-NEXT: btl $5, %r9d
+; X64-NEXT: adcq $0, %rax
+; X64-NEXT: adcq $0, %rsi
+; X64-NEXT: movq %rsi, %rdx
+; X64-NEXT: retq
%add = add i128 %y, %x
%shift = lshr i128 %z, 69
%mask = and i128 %shift, 1
diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll
index d15a075b583d5..fa196fb233dda 100644
--- a/llvm/test/CodeGen/X86/addcarry.ll
+++ b/llvm/test/CodeGen/X86/addcarry.ll
@@ -713,14 +713,12 @@ define { i64, i64, i1 } @addcarry_mixed_2x64(i64 %x0, i64 %x1, i64 %y0, i64 %y1)
; CHECK-LABEL: addcarry_mixed_2x64:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: xorl %edi, %edi
-; CHECK-NEXT: addq %rdx, %rax
-; CHECK-NEXT: setb %dil
; CHECK-NEXT: addq %rcx, %rsi
-; CHECK-NEXT: setb %dl
-; CHECK-NEXT: subq %rdi, %rsi
+; CHECK-NEXT: setb %dil
+; CHECK-NEXT: addq %rdx, %rax
+; CHECK-NEXT: sbbq $0, %rsi
; CHECK-NEXT: setb %cl
-; CHECK-NEXT: orb %dl, %cl
+; CHECK-NEXT: orb %dil, %cl
; CHECK-NEXT: movq %rsi, %rdx
; CHECK-NEXT: retq
%t0 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x0, i64 %y0)
diff --git a/llvm/test/CodeGen/X86/combine-sbb.ll b/llvm/test/CodeGen/X86/combine-sbb.ll
index 2811b7f86e994..9d0f7ca00c714 100644
--- a/llvm/test/CodeGen/X86/combine-sbb.ll
+++ b/llvm/test/CodeGen/X86/combine-sbb.ll
@@ -58,33 +58,29 @@ declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32)
define void @PR25858_i64(%WideUInt64* sret(%WideUInt64), %WideUInt64*, %WideUInt64*) nounwind {
; X86-LABEL: PR25858_i64:
; X86: # %bb.0: # %top
-; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl (%edi), %ecx
-; X86-NEXT: movl 4(%edi), %edx
-; X86-NEXT: subl (%esi), %ecx
-; X86-NEXT: sbbl 4(%esi), %edx
-; X86-NEXT: setb %bl
-; X86-NEXT: movl 12(%edi), %ebp
+; X86-NEXT: movl (%edi), %edx
+; X86-NEXT: movl 4(%edi), %esi
+; X86-NEXT: movl 12(%edi), %ecx
; X86-NEXT: movl 8(%edi), %edi
-; X86-NEXT: subl 8(%esi), %edi
-; X86-NEXT: sbbl 12(%esi), %ebp
-; X86-NEXT: movzbl %bl, %esi
-; X86-NEXT: subl %esi, %edi
-; X86-NEXT: sbbl $0, %ebp
-; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: subl 8(%ebx), %edi
+; X86-NEXT: sbbl 12(%ebx), %ecx
+; X86-NEXT: subl (%ebx), %edx
+; X86-NEXT: sbbl 4(%ebx), %esi
+; X86-NEXT: sbbl $0, %edi
+; X86-NEXT: sbbl $0, %ecx
+; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edi, 8(%eax)
-; X86-NEXT: movl %ebp, 12(%eax)
+; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: PR25858_i64:
diff --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll
index b4828aa092a3b..cc70e42c3348d 100644
--- a/llvm/test/CodeGen/X86/subcarry.ll
+++ b/llvm/test/CodeGen/X86/subcarry.ll
@@ -312,16 +312,15 @@ define { i64, i64, i1 } @subcarry_2x64_add_reversed(i64 %x0, i64 %x1, i64 %y0, i
; CHECK-LABEL: subcarry_2x64_add_reversed:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: movq %rsi, %rdi
+; CHECK-NEXT: subq %rcx, %rdi
; CHECK-NEXT: subq %rdx, %rax
-; CHECK-NEXT: setb %dil
-; CHECK-NEXT: movq %rsi, %rdx
-; CHECK-NEXT: subq %rcx, %rdx
-; CHECK-NEXT: subq %rdi, %rdx
-; CHECK-NEXT: setb %dil
+; CHECK-NEXT: sbbq $0, %rdi
+; CHECK-NEXT: setb %r8b
; CHECK-NEXT: cmpq %rcx, %rsi
-; CHECK-NEXT: adcb $0, %dil
-; CHECK-NEXT: movl %edi, %ecx
+; CHECK-NEXT: adcb $0, %r8b
+; CHECK-NEXT: movq %rdi, %rdx
+; CHECK-NEXT: movl %r8d, %ecx
; CHECK-NEXT: retq
%t0 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x0, i64 %y0)
%s0 = extractvalue { i64, i1 } %t0, 0
@@ -601,22 +600,20 @@ define void @sub_U256_without_i128_or_recursive(%uint256* sret(%uint256) %0, %ui
; CHECK-LABEL: sub_U256_without_i128_or_recursive:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: movq (%rsi), %r9
-; CHECK-NEXT: movq 8(%rsi), %r10
-; CHECK-NEXT: subq (%rdx), %r9
-; CHECK-NEXT: sbbq 8(%rdx), %r10
-; CHECK-NEXT: setb %r8b
+; CHECK-NEXT: movq (%rsi), %r8
+; CHECK-NEXT: movq 8(%rsi), %r9
; CHECK-NEXT: movq 16(%rsi), %rcx
; CHECK-NEXT: movq 24(%rsi), %rsi
; CHECK-NEXT: xorl %edi, %edi
; CHECK-NEXT: subq 16(%rdx), %rcx
; CHECK-NEXT: setb %dil
; CHECK-NEXT: subq 24(%rdx), %rsi
-; CHECK-NEXT: movzbl %r8b, %edx
-; CHECK-NEXT: subq %rdx, %rcx
+; CHECK-NEXT: subq (%rdx), %r8
+; CHECK-NEXT: sbbq 8(%rdx), %r9
+; CHECK-NEXT: sbbq $0, %rcx
; CHECK-NEXT: sbbq %rdi, %rsi
-; CHECK-NEXT: movq %r9, (%rax)
-; CHECK-NEXT: movq %r10, 8(%rax)
+; CHECK-NEXT: movq %r8, (%rax)
+; CHECK-NEXT: movq %r9, 8(%rax)
; CHECK-NEXT: movq %rcx, 16(%rax)
; CHECK-NEXT: movq %rsi, 24(%rax)
; CHECK-NEXT: retq
@@ -668,12 +665,10 @@ define void @sub_U256_without_i128_or_recursive(%uint256* sret(%uint256) %0, %ui
define i1 @subcarry_ult_2x64(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
; CHECK-LABEL: subcarry_ult_2x64:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: cmpq %rdx, %rdi
-; CHECK-NEXT: setb %al
; CHECK-NEXT: subq %rcx, %rsi
; CHECK-NEXT: setb %cl
-; CHECK-NEXT: cmpq %rax, %rsi
+; CHECK-NEXT: cmpq %rdx, %rdi
+; CHECK-NEXT: sbbq $0, %rsi
; CHECK-NEXT: setb %al
; CHECK-NEXT: orb %cl, %al
; CHECK-NEXT: retq
More information about the llvm-commits
mailing list