[llvm] a0ce233 - [InstCombine] reassociate fsub+fadd with FMF to increase adds and throughput

Tue May 26 10:17:26 PDT 2020

Author: Sanjay Patel
Date: 2020-05-26T13:17:15-04:00
New Revision: a0ce2338a0838ccb04e10bd4f8e9ec9d7136e1d2

URL: https://github.com/llvm/llvm-project/commit/a0ce2338a0838ccb04e10bd4f8e9ec9d7136e1d2
DIFF: https://github.com/llvm/llvm-project/commit/a0ce2338a0838ccb04e10bd4f8e9ec9d7136e1d2.diff

LOG: [InstCombine] reassociate fsub+fadd with FMF to increase adds and throughput

The -reassociate pass tends to transform this kind of pattern into
something that is worse for vectorization and codegen. See PR43953:
https://bugs.llvm.org/show_bug.cgi?id=43953

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
    llvm/test/Transforms/InstCombine/fsub.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 85d6f47b205b..233e0c7b5de7 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2195,6 +2195,17 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
       return BinaryOperator::CreateFMulFMF(Op0, OneSubC, &I);
     }
 
+    // Reassociate fsub/fadd sequences to create more fadd instructions and
+    // reduce dependency chains:
+    // ((X - Y) + Z) - Op1 --> (X + Z) - (Y + Op1)
+    Value *Z;
+    if (match(Op0, m_OneUse(m_c_FAdd(m_OneUse(m_FSub(m_Value(X), m_Value(Y))),
+                                     m_Value(Z))))) {
+      Value *XZ = Builder.CreateFAddFMF(X, Z, &I);
+      Value *YW = Builder.CreateFAddFMF(Y, Op1, &I);
+      return BinaryOperator::CreateFSubFMF(XZ, YW, &I);
+    }
+
     if (Instruction *F = factorizeFAddFSub(I, Builder))
       return F;
 

diff  --git a/llvm/test/Transforms/InstCombine/fsub.ll b/llvm/test/Transforms/InstCombine/fsub.ll
index 68e49c21b3b4..a0f02dee2909 100644
--- a/llvm/test/Transforms/InstCombine/fsub.ll
+++ b/llvm/test/Transforms/InstCombine/fsub.ll
@@ -785,11 +785,13 @@ define float @fneg_fsub_constant(float %x) {
   ret float %sub
 }
 
+; ((w-x) + y) - z --> (w+y) - (x+z)
+
 define float @fsub_fadd_fsub_reassoc(float %w, float %x, float %y, float %z) {
 ; CHECK-LABEL: @fsub_fadd_fsub_reassoc(
-; CHECK-NEXT:    [[S1:%.*]] = fsub reassoc nsz float [[W:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = fadd reassoc nsz float [[S1]], [[Y:%.*]]
-; CHECK-NEXT:    [[S2:%.*]] = fsub reassoc nsz float [[A]], [[Z:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[W:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc nsz float [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[S2:%.*]] = fsub reassoc nsz float [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret float [[S2]]
 ;
   %s1 = fsub reassoc nsz float %w, %x
@@ -798,12 +800,14 @@ define float @fsub_fadd_fsub_reassoc(float %w, float %x, float %y, float %z) {
   ret float %s2
 }
 
+; FMF on the last op is enough to do the transform; vectors work too.
+
 define <2 x float> @fsub_fadd_fsub_reassoc_commute(<2 x float> %w, <2 x float> %x, <2 x float> %y, <2 x float> %z) {
 ; CHECK-LABEL: @fsub_fadd_fsub_reassoc_commute(
 ; CHECK-NEXT:    [[D:%.*]] = fdiv <2 x float> [[Y:%.*]], <float 4.200000e+01, float -4.200000e+01>
-; CHECK-NEXT:    [[S1:%.*]] = fsub <2 x float> [[W:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = fadd <2 x float> [[D]], [[S1]]
-; CHECK-NEXT:    [[S2:%.*]] = fsub fast <2 x float> [[A]], [[Z:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <2 x float> [[D]], [[W:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <2 x float> [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[S2:%.*]] = fsub fast <2 x float> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret <2 x float> [[S2]]
 ;
   %d = fdiv <2 x float> %y, <float 42.0, float -42.0> ; thwart complexity-based canonicalization
@@ -813,12 +817,14 @@ define <2 x float> @fsub_fadd_fsub_reassoc_commute(<2 x float> %w, <2 x float> %
   ret <2 x float> %s2
 }
 
+; (v-w) + (x-y) - z --> (v+x) - (w+y+z)
+
 define float @fsub_fadd_fsub_reassoc_twice(float %v, float %w, float %x, float %y, float %z) {
 ; CHECK-LABEL: @fsub_fadd_fsub_reassoc_twice(
-; CHECK-NEXT:    [[S1:%.*]] = fsub reassoc nsz float [[V:%.*]], [[W:%.*]]
-; CHECK-NEXT:    [[S2:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = fadd reassoc nsz float [[S1]], [[S2]]
-; CHECK-NEXT:    [[S3:%.*]] = fsub reassoc nsz float [[A]], [[Z:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[W:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc nsz float [[X:%.*]], [[V:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd reassoc nsz float [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[S3:%.*]] = fsub reassoc nsz float [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    ret float [[S3]]
 ;
   %s1 = fsub reassoc nsz float %v, %w
@@ -828,6 +834,8 @@ define float @fsub_fadd_fsub_reassoc_twice(float %v, float %w, float %x, float %
   ret float %s3
 }
 
+; negative test - FMF
+
 define float @fsub_fadd_fsub_not_reassoc(float %w, float %x, float %y, float %z) {
 ; CHECK-LABEL: @fsub_fadd_fsub_not_reassoc(
 ; CHECK-NEXT:    [[S1:%.*]] = fsub fast float [[W:%.*]], [[X:%.*]]
@@ -841,6 +849,8 @@ define float @fsub_fadd_fsub_not_reassoc(float %w, float %x, float %y, float %z)
   ret float %s2
 }
 
+; negative test - uses
+
 define float @fsub_fadd_fsub_reassoc_use1(float %w, float %x, float %y, float %z) {
 ; CHECK-LABEL: @fsub_fadd_fsub_reassoc_use1(
 ; CHECK-NEXT:    [[S1:%.*]] = fsub fast float [[W:%.*]], [[X:%.*]]
@@ -856,6 +866,8 @@ define float @fsub_fadd_fsub_reassoc_use1(float %w, float %x, float %y, float %z
   ret float %s2
 }
 
+; negative test - uses
+
 define float @fsub_fadd_fsub_reassoc_use2(float %w, float %x, float %y, float %z) {
 ; CHECK-LABEL: @fsub_fadd_fsub_reassoc_use2(
 ; CHECK-NEXT:    [[S1:%.*]] = fsub fast float [[W:%.*]], [[X:%.*]]