[llvm] c64f5d4 - [X86] Attempt to fold EFLAGS into X86ISD::ADD/SUB ops

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue May 17 02:59:45 PDT 2022


Author: Simon Pilgrim
Date: 2022-05-17T10:59:24+01:00
New Revision: c64f5d44ad3bebeaccceb20b2730b4e65bb87297

URL: https://github.com/llvm/llvm-project/commit/c64f5d44ad3bebeaccceb20b2730b4e65bb87297
DIFF: https://github.com/llvm/llvm-project/commit/c64f5d44ad3bebeaccceb20b2730b4e65bb87297.diff

LOG: [X86] Attempt to fold EFLAGS into X86ISD::ADD/SUB ops

We already use combineAddOrSubToADCOrSBB to fold extended EFLAGS results into ISD::ADD/SUB ops as X86ISD::ADC/SBB carry ops.

This patch extends this to also try to fold EFLAGS results with X86ISD::ADD/SUB ops

Differential Revision: https://reviews.llvm.org/D125642

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/add-sub-bool.ll
    llvm/test/CodeGen/X86/addcarry.ll
    llvm/test/CodeGen/X86/combine-sbb.ll
    llvm/test/CodeGen/X86/subcarry.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a84cee543d5fc..f72e8149bca55 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -52270,7 +52270,8 @@ static bool onlyZeroFlagUsed(SDValue Flags) {
 /// Also try (ADD/SUB)+(AND(SRL,1)) bit extraction pattern with BT+{ADC, SBB}.
 static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
                                          SDValue X, SDValue Y,
-                                         SelectionDAG &DAG) {
+                                         SelectionDAG &DAG,
+                                         bool ZeroSecondOpOnly = false) {
   if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
     return SDValue();
 
@@ -52294,7 +52295,7 @@ static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
   // If X is -1 or 0, then we have an opportunity to avoid constants required in
   // the general case below.
   auto *ConstantX = dyn_cast<ConstantSDNode>(X);
-  if (ConstantX) {
+  if (ConstantX && !ZeroSecondOpOnly) {
     if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnes()) ||
         (IsSub && CC == X86::COND_B && ConstantX->isZero())) {
       // This is a complicated way to get -1 or 0 from the carry flag:
@@ -52332,6 +52333,9 @@ static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
                        DAG.getConstant(0, DL, VT), EFLAGS);
   }
 
+  if (ZeroSecondOpOnly)
+    return SDValue();
+
   if (CC == X86::COND_A) {
     // Try to convert COND_A into COND_B in an attempt to facilitate
     // materializing "setb reg".
@@ -52589,7 +52593,8 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
   MVT VT = LHS.getSimpleValueType();
-  unsigned GenericOpc = X86ISD::ADD == N->getOpcode() ? ISD::ADD : ISD::SUB;
+  bool IsSub = X86ISD::SUB == N->getOpcode();
+  unsigned GenericOpc = IsSub ? ISD::SUB : ISD::ADD;
 
   // If we don't use the flag result, simplify back to a generic ADD/SUB.
   if (!N->hasAnyUseOfValue(1)) {
@@ -52611,7 +52616,10 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
   MatchGeneric(LHS, RHS, false);
   MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode());
 
-  return SDValue();
+  // TODO: Can we drop the ZeroSecondOpOnly limit? This is to guarantee that the
+  // EFLAGS result doesn't change.
+  return combineAddOrSubToADCOrSBB(IsSub, DL, VT, LHS, RHS, DAG,
+                                   /*ZeroSecondOpOnly*/ true);
 }
 
 static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {

diff  --git a/llvm/test/CodeGen/X86/add-sub-bool.ll b/llvm/test/CodeGen/X86/add-sub-bool.ll
index f93d1598f9aed..248a9f19f1fbe 100644
--- a/llvm/test/CodeGen/X86/add-sub-bool.ll
+++ b/llvm/test/CodeGen/X86/add-sub-bool.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64,NOTBM
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+tbm | FileCheck %s --check-prefixes=X64,TBM
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+tbm | FileCheck %s --check-prefixes=X64
 
 ; PR35908 - Fold ADD/SUB and bit extracts into ADC/SBB+BT
 ;
@@ -104,55 +104,40 @@ define i24 @test_i24_add_add_idx(i24 %x, i24 %y, i24 %z) nounwind {
 define i128 @test_i128_add_add_idx(i128 %x, i128 %y, i128 %z) nounwind {
 ; X86-LABEL: test_i128_add_add_idx:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    adcl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    shrl $5, %edx
-; X86-NEXT:    andl $1, %edx
-; X86-NEXT:    addl %ebx, %edx
-; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    btl $5, {{[0-9]+}}(%esp)
 ; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    adcl $0, %edx
 ; X86-NEXT:    adcl $0, %ecx
-; X86-NEXT:    movl %edx, (%eax)
 ; X86-NEXT:    movl %edi, 4(%eax)
-; X86-NEXT:    movl %esi, 8(%eax)
+; X86-NEXT:    movl %esi, (%eax)
+; X86-NEXT:    movl %edx, 8(%eax)
 ; X86-NEXT:    movl %ecx, 12(%eax)
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
-; X86-NEXT:    popl %ebx
 ; X86-NEXT:    retl $4
 ;
-; NOTBM-LABEL: test_i128_add_add_idx:
-; NOTBM:       # %bb.0:
-; NOTBM-NEXT:    movq %r9, %rax
-; NOTBM-NEXT:    addq %rdx, %rdi
-; NOTBM-NEXT:    adcq %rcx, %rsi
-; NOTBM-NEXT:    shrl $5, %eax
-; NOTBM-NEXT:    andl $1, %eax
-; NOTBM-NEXT:    addq %rdi, %rax
-; NOTBM-NEXT:    adcq $0, %rsi
-; NOTBM-NEXT:    movq %rsi, %rdx
-; NOTBM-NEXT:    retq
-;
-; TBM-LABEL: test_i128_add_add_idx:
-; TBM:       # %bb.0:
-; TBM-NEXT:    addq %rdx, %rdi
-; TBM-NEXT:    adcq %rcx, %rsi
-; TBM-NEXT:    bextrl $261, %r9d, %eax # imm = 0x105
-; TBM-NEXT:    addq %rdi, %rax
-; TBM-NEXT:    adcq $0, %rsi
-; TBM-NEXT:    movq %rsi, %rdx
-; TBM-NEXT:    retq
+; X64-LABEL: test_i128_add_add_idx:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    addq %rdx, %rax
+; X64-NEXT:    adcq %rcx, %rsi
+; X64-NEXT:    btl $5, %r9d
+; X64-NEXT:    adcq $0, %rax
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %rsi, %rdx
+; X64-NEXT:    retq
   %add = add i128 %y, %x
   %shift = lshr i128 %z, 69
   %mask = and i128 %shift, 1

diff  --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll
index d15a075b583d5..fa196fb233dda 100644
--- a/llvm/test/CodeGen/X86/addcarry.ll
+++ b/llvm/test/CodeGen/X86/addcarry.ll
@@ -713,14 +713,12 @@ define { i64, i64, i1 } @addcarry_mixed_2x64(i64 %x0, i64 %x1, i64 %y0, i64 %y1)
 ; CHECK-LABEL: addcarry_mixed_2x64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    xorl %edi, %edi
-; CHECK-NEXT:    addq %rdx, %rax
-; CHECK-NEXT:    setb %dil
 ; CHECK-NEXT:    addq %rcx, %rsi
-; CHECK-NEXT:    setb %dl
-; CHECK-NEXT:    subq %rdi, %rsi
+; CHECK-NEXT:    setb %dil
+; CHECK-NEXT:    addq %rdx, %rax
+; CHECK-NEXT:    sbbq $0, %rsi
 ; CHECK-NEXT:    setb %cl
-; CHECK-NEXT:    orb %dl, %cl
+; CHECK-NEXT:    orb %dil, %cl
 ; CHECK-NEXT:    movq %rsi, %rdx
 ; CHECK-NEXT:    retq
   %t0 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x0, i64 %y0)

diff  --git a/llvm/test/CodeGen/X86/combine-sbb.ll b/llvm/test/CodeGen/X86/combine-sbb.ll
index 2811b7f86e994..9d0f7ca00c714 100644
--- a/llvm/test/CodeGen/X86/combine-sbb.ll
+++ b/llvm/test/CodeGen/X86/combine-sbb.ll
@@ -58,33 +58,29 @@ declare  { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32)
 define void @PR25858_i64(%WideUInt64* sret(%WideUInt64), %WideUInt64*, %WideUInt64*) nounwind {
 ; X86-LABEL: PR25858_i64:
 ; X86:       # %bb.0: # %top
-; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl (%edi), %ecx
-; X86-NEXT:    movl 4(%edi), %edx
-; X86-NEXT:    subl (%esi), %ecx
-; X86-NEXT:    sbbl 4(%esi), %edx
-; X86-NEXT:    setb %bl
-; X86-NEXT:    movl 12(%edi), %ebp
+; X86-NEXT:    movl (%edi), %edx
+; X86-NEXT:    movl 4(%edi), %esi
+; X86-NEXT:    movl 12(%edi), %ecx
 ; X86-NEXT:    movl 8(%edi), %edi
-; X86-NEXT:    subl 8(%esi), %edi
-; X86-NEXT:    sbbl 12(%esi), %ebp
-; X86-NEXT:    movzbl %bl, %esi
-; X86-NEXT:    subl %esi, %edi
-; X86-NEXT:    sbbl $0, %ebp
-; X86-NEXT:    movl %ecx, (%eax)
-; X86-NEXT:    movl %edx, 4(%eax)
+; X86-NEXT:    subl 8(%ebx), %edi
+; X86-NEXT:    sbbl 12(%ebx), %ecx
+; X86-NEXT:    subl (%ebx), %edx
+; X86-NEXT:    sbbl 4(%ebx), %esi
+; X86-NEXT:    sbbl $0, %edi
+; X86-NEXT:    sbbl $0, %ecx
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
 ; X86-NEXT:    movl %edi, 8(%eax)
-; X86-NEXT:    movl %ebp, 12(%eax)
+; X86-NEXT:    movl %ecx, 12(%eax)
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
-; X86-NEXT:    popl %ebp
 ; X86-NEXT:    retl $4
 ;
 ; X64-LABEL: PR25858_i64:

diff  --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll
index b4828aa092a3b..cc70e42c3348d 100644
--- a/llvm/test/CodeGen/X86/subcarry.ll
+++ b/llvm/test/CodeGen/X86/subcarry.ll
@@ -312,16 +312,15 @@ define { i64, i64, i1 } @subcarry_2x64_add_reversed(i64 %x0, i64 %x1, i64 %y0, i
 ; CHECK-LABEL: subcarry_2x64_add_reversed:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    movq %rsi, %rdi
+; CHECK-NEXT:    subq %rcx, %rdi
 ; CHECK-NEXT:    subq %rdx, %rax
-; CHECK-NEXT:    setb %dil
-; CHECK-NEXT:    movq %rsi, %rdx
-; CHECK-NEXT:    subq %rcx, %rdx
-; CHECK-NEXT:    subq %rdi, %rdx
-; CHECK-NEXT:    setb %dil
+; CHECK-NEXT:    sbbq $0, %rdi
+; CHECK-NEXT:    setb %r8b
 ; CHECK-NEXT:    cmpq %rcx, %rsi
-; CHECK-NEXT:    adcb $0, %dil
-; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    adcb $0, %r8b
+; CHECK-NEXT:    movq %rdi, %rdx
+; CHECK-NEXT:    movl %r8d, %ecx
 ; CHECK-NEXT:    retq
   %t0 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x0, i64 %y0)
   %s0 = extractvalue { i64, i1 } %t0, 0
@@ -601,22 +600,20 @@ define void @sub_U256_without_i128_or_recursive(%uint256* sret(%uint256) %0, %ui
 ; CHECK-LABEL: sub_U256_without_i128_or_recursive:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    movq (%rsi), %r9
-; CHECK-NEXT:    movq 8(%rsi), %r10
-; CHECK-NEXT:    subq (%rdx), %r9
-; CHECK-NEXT:    sbbq 8(%rdx), %r10
-; CHECK-NEXT:    setb %r8b
+; CHECK-NEXT:    movq (%rsi), %r8
+; CHECK-NEXT:    movq 8(%rsi), %r9
 ; CHECK-NEXT:    movq 16(%rsi), %rcx
 ; CHECK-NEXT:    movq 24(%rsi), %rsi
 ; CHECK-NEXT:    xorl %edi, %edi
 ; CHECK-NEXT:    subq 16(%rdx), %rcx
 ; CHECK-NEXT:    setb %dil
 ; CHECK-NEXT:    subq 24(%rdx), %rsi
-; CHECK-NEXT:    movzbl %r8b, %edx
-; CHECK-NEXT:    subq %rdx, %rcx
+; CHECK-NEXT:    subq (%rdx), %r8
+; CHECK-NEXT:    sbbq 8(%rdx), %r9
+; CHECK-NEXT:    sbbq $0, %rcx
 ; CHECK-NEXT:    sbbq %rdi, %rsi
-; CHECK-NEXT:    movq %r9, (%rax)
-; CHECK-NEXT:    movq %r10, 8(%rax)
+; CHECK-NEXT:    movq %r8, (%rax)
+; CHECK-NEXT:    movq %r9, 8(%rax)
 ; CHECK-NEXT:    movq %rcx, 16(%rax)
 ; CHECK-NEXT:    movq %rsi, 24(%rax)
 ; CHECK-NEXT:    retq
@@ -668,12 +665,10 @@ define void @sub_U256_without_i128_or_recursive(%uint256* sret(%uint256) %0, %ui
 define i1 @subcarry_ult_2x64(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
 ; CHECK-LABEL: subcarry_ult_2x64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    cmpq %rdx, %rdi
-; CHECK-NEXT:    setb %al
 ; CHECK-NEXT:    subq %rcx, %rsi
 ; CHECK-NEXT:    setb %cl
-; CHECK-NEXT:    cmpq %rax, %rsi
+; CHECK-NEXT:    cmpq %rdx, %rdi
+; CHECK-NEXT:    sbbq $0, %rsi
 ; CHECK-NEXT:    setb %al
 ; CHECK-NEXT:    orb %cl, %al
 ; CHECK-NEXT:    retq


        


More information about the llvm-commits mailing list