[llvm] 3180af4 - [InstCombine] reassociate fsub+fsub into fsub+fadd

Wed Jan 15 08:15:02 PST 2020

Author: Sanjay Patel
Date: 2020-01-15T11:14:13-05:00
New Revision: 3180af4362be22d416464f5f3700c456b2f124b9

URL: https://github.com/llvm/llvm-project/commit/3180af4362be22d416464f5f3700c456b2f124b9
DIFF: https://github.com/llvm/llvm-project/commit/3180af4362be22d416464f5f3700c456b2f124b9.diff

LOG: [InstCombine] reassociate fsub+fsub into fsub+fadd

As discussed in the motivating PR44509:
https://bugs.llvm.org/show_bug.cgi?id=44509

...we can end up with worse code using fast-math than without.
This is because the reassociate pass greedily transforms fsub
into fneg/fadd and apparently (based on the regression tests
seen here) expects instcombine to clean that up if it wasn't
profitable. But we were missing this fold:

(X - Y) - Z --> X - (Y + Z)

There's another, more specific case that I think we should
handle as shown in the "fake" fneg test (but missed with a real
fneg), but that's another patch. That may be tricky to get
right without conflicting with existing transforms for fneg.

Differential Revision: https://reviews.llvm.org/D72521

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
    llvm/test/Transforms/InstCombine/fsub.ll
    llvm/test/Transforms/Reassociate/fast-SubReassociate.ll
    llvm/test/Transforms/Reassociate/fast-basictest.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index ec976a971e3c..6eaae8f79a21 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2285,6 +2285,12 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
     // complex pattern matching and remove this from InstCombine.
     if (Value *V = FAddCombine(Builder).simplify(&I))
       return replaceInstUsesWith(I, V);
+
+    // (X - Y) - Op1 --> X - (Y + Op1)
+    if (match(Op0, m_OneUse(m_FSub(m_Value(X), m_Value(Y))))) {
+      Value *FAdd = Builder.CreateFAddFMF(Y, Op1, &I);
+      return BinaryOperator::CreateFSubFMF(X, FAdd, &I);
+    }
   }
 
   return nullptr;

diff  --git a/llvm/test/Transforms/InstCombine/fsub.ll b/llvm/test/Transforms/InstCombine/fsub.ll
index f2266dd17709..b9163b98ce8e 100644
--- a/llvm/test/Transforms/InstCombine/fsub.ll
+++ b/llvm/test/Transforms/InstCombine/fsub.ll
@@ -642,6 +642,8 @@ define float @fsub_fmul_fneg2_extra_use3(float %x, float %y, float %z) {
   ret float %r
 }
 
+; Negative test - can't reassociate without FMF.
+
 define float @fsub_fsub(float %x, float %y, float %z) {
 ; CHECK-LABEL: @fsub_fsub(
 ; CHECK-NEXT:    [[XY:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
@@ -653,6 +655,8 @@ define float @fsub_fsub(float %x, float %y, float %z) {
   ret float %xyz
 }
 
+; Negative test - can't reassociate without enough FMF.
+
 define float @fsub_fsub_nsz(float %x, float %y, float %z) {
 ; CHECK-LABEL: @fsub_fsub_nsz(
 ; CHECK-NEXT:    [[XY:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
@@ -664,6 +668,8 @@ define float @fsub_fsub_nsz(float %x, float %y, float %z) {
   ret float %xyz
 }
 
+; Negative test - can't reassociate without enough FMF.
+
 define float @fsub_fsub_reassoc(float %x, float %y, float %z) {
 ; CHECK-LABEL: @fsub_fsub_reassoc(
 ; CHECK-NEXT:    [[XY:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
@@ -677,8 +683,8 @@ define float @fsub_fsub_reassoc(float %x, float %y, float %z) {
 
 define float @fsub_fsub_nsz_reassoc(float %x, float %y, float %z) {
 ; CHECK-LABEL: @fsub_fsub_nsz_reassoc(
-; CHECK-NEXT:    [[XY:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[XYZ:%.*]] = fsub reassoc nsz float [[XY]], [[Z:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[XYZ:%.*]] = fsub reassoc nsz float [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret float [[XYZ]]
 ;
   %xy = fsub float %x, %y
@@ -688,8 +694,8 @@ define float @fsub_fsub_nsz_reassoc(float %x, float %y, float %z) {
 
 define <2 x double> @fsub_fsub_fast_vec(<2 x double> %x, <2 x double> %y, <2 x double> %z) {
 ; CHECK-LABEL: @fsub_fsub_fast_vec(
-; CHECK-NEXT:    [[XY:%.*]] = fsub fast <2 x double> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[XYZ:%.*]] = fsub fast <2 x double> [[XY]], [[Z:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <2 x double> [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[XYZ:%.*]] = fsub fast <2 x double> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x double> [[XYZ]]
 ;
   %xy = fsub fast <2 x double> %x, %y
@@ -697,6 +703,8 @@ define <2 x double> @fsub_fsub_fast_vec(<2 x double> %x, <2 x double> %y, <2 x d
   ret <2 x double> %xyz
 }
 
+; Negative test - don't reassociate and increase instructions.
+
 define float @fsub_fsub_nsz_reassoc_extra_use(float %x, float %y, float %z) {
 ; CHECK-LABEL: @fsub_fsub_nsz_reassoc_extra_use(
 ; CHECK-NEXT:    [[XY:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
@@ -734,8 +742,8 @@ define float @fneg_fsub_nsz(float %x, float %y) {
 
 define float @fake_fneg_fsub_fast(float %x, float %y) {
 ; CHECK-LABEL: @fake_fneg_fsub_fast(
-; CHECK-NEXT:    [[NEGX:%.*]] = fsub float -0.000000e+00, [[X:%.*]]
-; CHECK-NEXT:    [[SUB:%.*]] = fsub fast float [[NEGX]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = fsub fast float -0.000000e+00, [[TMP1]]
 ; CHECK-NEXT:    ret float [[SUB]]
 ;
   %negx = fsub float -0.0, %x

diff  --git a/llvm/test/Transforms/Reassociate/fast-SubReassociate.ll b/llvm/test/Transforms/Reassociate/fast-SubReassociate.ll
index 35c8dd4162a9..7f9d2f1ce366 100644
--- a/llvm/test/Transforms/Reassociate/fast-SubReassociate.ll
+++ b/llvm/test/Transforms/Reassociate/fast-SubReassociate.ll
@@ -79,16 +79,10 @@ define float @test3(float %A, float %B, float %C, float %D) {
 ; With sub reassociation, constant folding can eliminate the two 12 constants.
 
 define float @test4(float %A, float %B, float %C, float %D) {
-; FIXME: InstCombine should be able to get us to the following:
-; %sum = fadd fast float %B, %A
-; %sum1 = fadd fast float %sum, %C
-; %Q = fsub fast float %D, %sum1
-; ret i32 %Q
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[B_NEG:%.*]] = fsub fast float -0.000000e+00, [[B:%.*]]
-; CHECK-NEXT:    [[O_NEG:%.*]] = fsub fast float [[B_NEG]], [[A:%.*]]
-; CHECK-NEXT:    [[P:%.*]] = fsub fast float [[O_NEG]], [[C:%.*]]
-; CHECK-NEXT:    [[Q:%.*]] = fadd fast float [[P]], [[D:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast float [[TMP1]], [[C:%.*]]
+; CHECK-NEXT:    [[Q:%.*]] = fsub fast float [[D:%.*]], [[TMP2]]
 ; CHECK-NEXT:    ret float [[Q]]
 ;
   %M = fadd fast float 1.200000e+01, %A

diff  --git a/llvm/test/Transforms/Reassociate/fast-basictest.ll b/llvm/test/Transforms/Reassociate/fast-basictest.ll
index 62b13f41b2bb..95f89102afe4 100644
--- a/llvm/test/Transforms/Reassociate/fast-basictest.ll
+++ b/llvm/test/Transforms/Reassociate/fast-basictest.ll
@@ -669,13 +669,9 @@ define float @test19_reassoc(float %A, float %B) {
 
 ; With sub reassociation, constant folding can eliminate the uses of %a.
 define float @test20(float %a, float %b, float %c) nounwind  {
-; FIXME: Should be able to generate the below, which may expose more
-;        opportunites for FAdd reassociation.
-; %sum = fadd fast float %c, %b
-; %t7 = fsub fast float 0, %sum
 ; CHECK-LABEL: @test20(
-; CHECK-NEXT:    [[B_NEG:%.*]] = fsub fast float -0.000000e+00, [[B:%.*]]
-; CHECK-NEXT:    [[T7:%.*]] = fsub fast float [[B_NEG]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[T7:%.*]] = fsub fast float -0.000000e+00, [[TMP1]]
 ; CHECK-NEXT:    ret float [[T7]]
 ;
   %t3 = fsub fast float %a, %b