[llvm-branch-commits] [llvm-branch] r366704 - Merging r366431:

Mon Jul 22 10:32:33 PDT 2019

Author: hans
Date: Mon Jul 22 10:32:32 2019
New Revision: 366704

URL: http://llvm.org/viewvc/llvm-project?rev=366704&view=rev
Log:
Merging r366431:
------------------------------------------------------------------------
r366431 | spatel | 2019-07-18 14:48:01 +0200 (Thu, 18 Jul 2019) | 13 lines

[x86] try harder to form LEA from ADD to avoid flag conflicts (PR40483)

LEA doesn't affect flags, so use it more liberally to replace an ADD when
we know that the ADD operands affect flags.

In the motivating example from PR40483:
https://bugs.llvm.org/show_bug.cgi?id=40483
...this lets us avoid duplicating a math op just to avoid flag conflict.

As mentioned in the TODO comments, this heuristic can be extended to
fire more often if that leads to more improvements.

Differential Revision: https://reviews.llvm.org/D64707
------------------------------------------------------------------------

Modified:
    llvm/branches/release_90/   (props changed)
    llvm/branches/release_90/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/branches/release_90/test/CodeGen/X86/combine-sbb.ll

Propchange: llvm/branches/release_90/
------------------------------------------------------------------------------

--- svn:mergeinfo (original)
+++ svn:mergeinfo Mon Jul 22 10:32:32 2019
@@ -1,3 +1,3 @@
 /llvm/branches/Apple/Pertwee:110850,110961
 /llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241
+/llvm/trunk:155241,366431

Modified: llvm/branches/release_90/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_90/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=366704&r1=366703&r2=366704&view=diff
==============================================================================
--- llvm/branches/release_90/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/branches/release_90/lib/Target/X86/X86ISelDAGToDAG.cpp Mon Jul 22 10:32:32 2019
@@ -2464,6 +2464,37 @@ bool X86DAGToDAGISel::selectLEAAddr(SDVa
       Complexity += 2;
   }
 
+  // Heuristic: try harder to form an LEA from ADD if the operands set flags.
+  // Unlike ADD, LEA does not affect flags, so we will be less likely to require
+  // duplicating flag-producing instructions later in the pipeline.
+  if (N.getOpcode() == ISD::ADD) {
+    auto isMathWithFlags = [](SDValue V) {
+      switch (V.getOpcode()) {
+      case X86ISD::ADD:
+      case X86ISD::SUB:
+      case X86ISD::ADC:
+      case X86ISD::SBB:
+      /* TODO: These opcodes can be added safely, but we may want to justify
+               their inclusion for different reasons (better for reg-alloc).
+      case X86ISD::SMUL:
+      case X86ISD::UMUL:
+      case X86ISD::OR:
+      case X86ISD::XOR:
+      case X86ISD::AND:
+      */
+        // Value 1 is the flag output of the node - verify it's not dead.
+        return !SDValue(V.getNode(), 1).use_empty();
+      default:
+        return false;
+      }
+    };
+    // TODO: This could be an 'or' rather than 'and' to make the transform more
+    //       likely to happen. We might want to factor in whether there's a
+    //       load folding opportunity for the math op that disappears with LEA.
+    if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1)))
+      Complexity++;
+  }
+
   if (AM.Disp)
     Complexity++;
 

Modified: llvm/branches/release_90/test/CodeGen/X86/combine-sbb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_90/test/CodeGen/X86/combine-sbb.ll?rev=366704&r1=366703&r2=366704&view=diff
==============================================================================
--- llvm/branches/release_90/test/CodeGen/X86/combine-sbb.ll (original)
+++ llvm/branches/release_90/test/CodeGen/X86/combine-sbb.ll Mon Jul 22 10:32:32 2019
@@ -309,35 +309,25 @@ define i32 @PR40483_sub5(i32*, i32) noun
 define i32 @PR40483_sub6(i32*, i32) nounwind {
 ; X86-LABEL: PR40483_sub6:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl (%edx), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl %esi, %ecx
-; X86-NEXT:    subl %edi, %ecx
+; X86-NEXT:    movl (%edx), %ecx
 ; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    subl %edi, %esi
-; X86-NEXT:    movl %esi, (%edx)
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, (%edx)
 ; X86-NEXT:    jae .LBB8_2
 ; X86-NEXT:  # %bb.1:
-; X86-NEXT:    addl %ecx, %ecx
-; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    leal (%ecx,%ecx), %eax
 ; X86-NEXT:  .LBB8_2:
-; X86-NEXT:    popl %esi
-; X86-NEXT:    popl %edi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: PR40483_sub6:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl (%rdi), %ecx
-; X64-NEXT:    movl %ecx, %edx
-; X64-NEXT:    subl %esi, %edx
-; X64-NEXT:    addl %edx, %edx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    subl %esi, %ecx
-; X64-NEXT:    movl %ecx, (%rdi)
-; X64-NEXT:    cmovbl %edx, %eax
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    movl %eax, (%rdi)
+; X64-NEXT:    leal (%rax,%rax), %eax
+; X64-NEXT:    cmovael %ecx, %eax
 ; X64-NEXT:    retq
   %3 = load i32, i32* %0, align 8
   %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)