[llvm] 0b00cd1 - [X86] selectLEAAddr - relax heuristic to only require one operand to be a MathWithFlags op (PR46809)

Tue Feb 8 07:09:35 PST 2022

Author: Simon Pilgrim
Date: 2022-02-08T15:09:22Z
New Revision: 0b00cd19e6f3ec168b9930f235bafd5e00253dbf

URL: https://github.com/llvm/llvm-project/commit/0b00cd19e6f3ec168b9930f235bafd5e00253dbf
DIFF: https://github.com/llvm/llvm-project/commit/0b00cd19e6f3ec168b9930f235bafd5e00253dbf.diff

LOG: [X86] selectLEAAddr - relax heuristic to only require one operand to be a MathWithFlags op (PR46809)

As suggested by @craig.topper, relaxing LEA matching to only require the ADD to be fed from a single op with EFLAGS helps avoid duplication when the EFLAGS are consumed in a later, dependent instruction.

There was some concern about whether the heuristic is too simple, not taking into account lost loads that can't fold by using a LEA, but some basic tests (included in select-lea.ll) don't suggest that's really a problem.

Differential Revision: https://reviews.llvm.org/D118128

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/test/CodeGen/X86/combine-add.ll
    llvm/test/CodeGen/X86/select-lea.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 54cb88d037678..17f4b1ec5bf78 100644

--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2796,10 +2796,9 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
         return false;
       }
     };
-    // TODO: This could be an 'or' rather than 'and' to make the transform more
-    //       likely to happen. We might want to factor in whether there's a
-    //       load folding opportunity for the math op that disappears with LEA.
-    if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1)))
+    // TODO: We might want to factor in whether there's a load folding
+    // opportunity for the math op that disappears with LEA.
+    if (isMathWithFlags(N.getOperand(0)) || isMathWithFlags(N.getOperand(1)))
       Complexity++;
   }
 

diff  --git a/llvm/test/CodeGen/X86/combine-add.ll b/llvm/test/CodeGen/X86/combine-add.ll
index 0c38d41190e09..5c3e17849b2a8 100644
--- a/llvm/test/CodeGen/X86/combine-add.ll
+++ b/llvm/test/CodeGen/X86/combine-add.ll
@@ -381,11 +381,12 @@ declare {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
 define i1 @sadd_add(i32 %a, i32 %b, i32* %p) {
 ; CHECK-LABEL: sadd_add:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
 ; CHECK-NEXT:    notl %edi
 ; CHECK-NEXT:    addl %esi, %edi
 ; CHECK-NEXT:    seto %al
-; CHECK-NEXT:    incl %edi
-; CHECK-NEXT:    movl %edi, (%rdx)
+; CHECK-NEXT:    leal 1(%rdi), %ecx
+; CHECK-NEXT:    movl %ecx, (%rdx)
 ; CHECK-NEXT:    retq
   %nota = xor i32 %a, -1
   %a0 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %nota, i32 %b)

diff  --git a/llvm/test/CodeGen/X86/select-lea.ll b/llvm/test/CodeGen/X86/select-lea.ll
index df4a760eb339e..a9dd3f6402728 100644
--- a/llvm/test/CodeGen/X86/select-lea.ll
+++ b/llvm/test/CodeGen/X86/select-lea.ll
@@ -8,34 +8,27 @@
 define i32 @sadd_add_imm(i32 %x, i32 %y) {
 ; X64-LABEL: sadd_add_imm:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    leal (%rdi,%rsi), %eax
-; X64-NEXT:    addl $100, %eax
 ; X64-NEXT:    addl %esi, %edi
+; X64-NEXT:    leal 100(%rdi), %eax
 ; X64-NEXT:    cmovnol %edi, %eax
 ; X64-NEXT:    retq
 ;
 ; CMOV-LABEL: sadd_add_imm:
 ; CMOV:       # %bb.0:
-; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CMOV-NEXT:    leal (%eax,%ecx), %edx
-; CMOV-NEXT:    addl $100, %edx
-; CMOV-NEXT:    addl %ecx, %eax
-; CMOV-NEXT:    cmovol %edx, %eax
+; CMOV-NEXT:    addl {{[0-9]+}}(%esp), %ecx
+; CMOV-NEXT:    leal 100(%ecx), %eax
+; CMOV-NEXT:    cmovnol %ecx, %eax
 ; CMOV-NEXT:    retl
 ;
 ; NOCMOV-LABEL: sadd_add_imm:
 ; NOCMOV:       # %bb.0:
 ; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; NOCMOV-NEXT:    leal (%eax,%edx), %ecx
-; NOCMOV-NEXT:    addl %edx, %eax
+; NOCMOV-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; NOCMOV-NEXT:    jno .LBB0_2
 ; NOCMOV-NEXT:  # %bb.1:
-; NOCMOV-NEXT:    addl $100, %ecx
-; NOCMOV-NEXT:    movl %ecx, %eax
+; NOCMOV-NEXT:    addl $100, %eax
 ; NOCMOV-NEXT:  .LBB0_2:
 ; NOCMOV-NEXT:    retl
   %o = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
@@ -95,34 +88,27 @@ define i32 @sadd_add_load(i32 %x, i32 %y, i32* %pz) nounwind {
 define i32 @uadd_add_imm(i32 %x, i32 %y) {
 ; X64-LABEL: uadd_add_imm:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    leal (%rdi,%rsi), %eax
-; X64-NEXT:    addl $100, %eax
 ; X64-NEXT:    addl %esi, %edi
+; X64-NEXT:    leal 100(%rdi), %eax
 ; X64-NEXT:    cmovael %edi, %eax
 ; X64-NEXT:    retq
 ;
 ; CMOV-LABEL: uadd_add_imm:
 ; CMOV:       # %bb.0:
-; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CMOV-NEXT:    leal (%eax,%ecx), %edx
-; CMOV-NEXT:    addl $100, %edx
-; CMOV-NEXT:    addl %ecx, %eax
-; CMOV-NEXT:    cmovbl %edx, %eax
+; CMOV-NEXT:    addl {{[0-9]+}}(%esp), %ecx
+; CMOV-NEXT:    leal 100(%ecx), %eax
+; CMOV-NEXT:    cmovael %ecx, %eax
 ; CMOV-NEXT:    retl
 ;
 ; NOCMOV-LABEL: uadd_add_imm:
 ; NOCMOV:       # %bb.0:
 ; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; NOCMOV-NEXT:    leal (%eax,%edx), %ecx
-; NOCMOV-NEXT:    addl %edx, %eax
+; NOCMOV-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; NOCMOV-NEXT:    jae .LBB2_2
 ; NOCMOV-NEXT:  # %bb.1:
-; NOCMOV-NEXT:    addl $100, %ecx
-; NOCMOV-NEXT:    movl %ecx, %eax
+; NOCMOV-NEXT:    addl $100, %eax
 ; NOCMOV-NEXT:  .LBB2_2:
 ; NOCMOV-NEXT:    retl
   %o = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
@@ -182,35 +168,27 @@ define i32 @uadd_add_load(i32 %x, i32 %y, i32* %pz) nounwind {
 define i32 @ssub_add_imm(i32 %x, i32 %y) {
 ; X64-LABEL: ssub_add_imm:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    subl %esi, %eax
-; X64-NEXT:    addl $100, %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    leal 100(%rdi), %eax
 ; X64-NEXT:    cmovnol %edi, %eax
 ; X64-NEXT:    retq
 ;
 ; CMOV-LABEL: ssub_add_imm:
 ; CMOV:       # %bb.0:
-; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CMOV-NEXT:    movl %eax, %edx
-; CMOV-NEXT:    subl %ecx, %edx
-; CMOV-NEXT:    addl $100, %edx
-; CMOV-NEXT:    subl %ecx, %eax
-; CMOV-NEXT:    cmovol %edx, %eax
+; CMOV-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; CMOV-NEXT:    leal 100(%ecx), %eax
+; CMOV-NEXT:    cmovnol %ecx, %eax
 ; CMOV-NEXT:    retl
 ;
 ; NOCMOV-LABEL: ssub_add_imm:
 ; NOCMOV:       # %bb.0:
 ; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; NOCMOV-NEXT:    movl %eax, %ecx
-; NOCMOV-NEXT:    subl %edx, %ecx
-; NOCMOV-NEXT:    subl %edx, %eax
+; NOCMOV-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; NOCMOV-NEXT:    jno .LBB4_2
 ; NOCMOV-NEXT:  # %bb.1:
-; NOCMOV-NEXT:    addl $100, %ecx
-; NOCMOV-NEXT:    movl %ecx, %eax
+; NOCMOV-NEXT:    addl $100, %eax
 ; NOCMOV-NEXT:  .LBB4_2:
 ; NOCMOV-NEXT:    retl
   %o = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 %y)
@@ -271,35 +249,27 @@ define i32 @ssub_add_load(i32 %x, i32 %y, i32* %pz) nounwind {
 define i32 @usub_add_imm(i32 %x, i32 %y) {
 ; X64-LABEL: usub_add_imm:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    subl %esi, %eax
-; X64-NEXT:    addl $100, %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    leal 100(%rdi), %eax
 ; X64-NEXT:    cmovael %edi, %eax
 ; X64-NEXT:    retq
 ;
 ; CMOV-LABEL: usub_add_imm:
 ; CMOV:       # %bb.0:
-; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CMOV-NEXT:    movl %eax, %edx
-; CMOV-NEXT:    subl %ecx, %edx
-; CMOV-NEXT:    addl $100, %edx
-; CMOV-NEXT:    subl %ecx, %eax
-; CMOV-NEXT:    cmovbl %edx, %eax
+; CMOV-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; CMOV-NEXT:    leal 100(%ecx), %eax
+; CMOV-NEXT:    cmovael %ecx, %eax
 ; CMOV-NEXT:    retl
 ;
 ; NOCMOV-LABEL: usub_add_imm:
 ; NOCMOV:       # %bb.0:
 ; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; NOCMOV-NEXT:    movl %eax, %ecx
-; NOCMOV-NEXT:    subl %edx, %ecx
-; NOCMOV-NEXT:    subl %edx, %eax
+; NOCMOV-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; NOCMOV-NEXT:    jae .LBB6_2
 ; NOCMOV-NEXT:  # %bb.1:
-; NOCMOV-NEXT:    addl $100, %ecx
-; NOCMOV-NEXT:    movl %ecx, %eax
+; NOCMOV-NEXT:    addl $100, %eax
 ; NOCMOV-NEXT:  .LBB6_2:
 ; NOCMOV-NEXT:    retl
   %o = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 %y)