[llvm] r329121 - [InstCombine] allow more fmul folds with 'reassoc'

Tue Apr 3 15:19:19 PDT 2018

Author: spatel
Date: Tue Apr  3 15:19:19 2018
New Revision: 329121

URL: http://llvm.org/viewvc/llvm-project?rev=329121&view=rev
Log:
[InstCombine] allow more fmul folds with 'reassoc'

The tests marked with 'FIXME' require loosening the check
in SimplifyAssociativeOrCommutative() to optimize completely;
that's still checking isFast() in Instruction::isAssociative().

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
    llvm/trunk/test/Transforms/InstCombine/fmul.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp?rev=329121&r1=329120&r2=329121&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp Tue Apr  3 15:19:19 2018
@@ -491,43 +491,74 @@ Instruction *InstCombiner::visitFMul(Bin
   if (Value *V = SimplifySelectsFeedingBinaryOp(I, Op0, Op1))
     return replaceInstUsesWith(I, V);
 
-  // Reassociate constant RHS with another constant to form constant expression.
-  // FIXME: These folds do not require all FMF.
-  if (I.isFast() && match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP()) {
-    Constant *C1;
-    if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) {
-      // (C1 / X) * C --> (C * C1) / X
-      Constant *CC1 = ConstantExpr::getFMul(C, C1);
-      if (CC1->isNormalFP())
-        return BinaryOperator::CreateFDivFMF(CC1, X, &I);
-    }
-    if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) {
-      // (X / C1) * C --> X * (C / C1)
-      Constant *CDivC1 = ConstantExpr::getFDiv(C, C1);
-      if (CDivC1->isNormalFP())
-        return BinaryOperator::CreateFMulFMF(X, CDivC1, &I);
-
-      // If the constant was a denormal, try reassociating differently.
-      // (X / C1) * C --> X / (C1 / C)
-      Constant *C1DivC = ConstantExpr::getFDiv(C1, C);
-      if (Op0->hasOneUse() && C1DivC->isNormalFP())
-        return BinaryOperator::CreateFDivFMF(X, C1DivC, &I);
-    }
-
-    // We do not need to match 'fadd C, X' and 'fsub X, C' because they are
-    // canonicalized to 'fadd X, C'. Distributing the multiply may allow further
-    // folds and (X * C) + C2 is 'fma'.
-    if (match(Op0, m_OneUse(m_FAdd(m_Value(X), m_Constant(C1))))) {
-      // (X + C1) * C --> (X * C) + (C * C1)
-      Constant *CC1 = ConstantExpr::getFMul(C, C1);
-      Value *XC = Builder.CreateFMulFMF(X, C, &I);
-      return BinaryOperator::CreateFAddFMF(XC, CC1, &I);
-    }
-    if (match(Op0, m_OneUse(m_FSub(m_Constant(C1), m_Value(X))))) {
-      // (C1 - X) * C --> (C * C1) - (X * C)
-      Constant *CC1 = ConstantExpr::getFMul(C, C1);
-      Value *XC = Builder.CreateFMulFMF(X, C, &I);
-      return BinaryOperator::CreateFSubFMF(CC1, XC, &I);
+  if (I.hasAllowReassoc()) {
+    // Reassociate constant RHS with another constant to form constant
+    // expression.
+    if (match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP()) {
+      Constant *C1;
+      if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) {
+        // (C1 / X) * C --> (C * C1) / X
+        Constant *CC1 = ConstantExpr::getFMul(C, C1);
+        if (CC1->isNormalFP())
+          return BinaryOperator::CreateFDivFMF(CC1, X, &I);
+      }
+      if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) {
+        // (X / C1) * C --> X * (C / C1)
+        Constant *CDivC1 = ConstantExpr::getFDiv(C, C1);
+        if (CDivC1->isNormalFP())
+          return BinaryOperator::CreateFMulFMF(X, CDivC1, &I);
+
+        // If the constant was a denormal, try reassociating differently.
+        // (X / C1) * C --> X / (C1 / C)
+        Constant *C1DivC = ConstantExpr::getFDiv(C1, C);
+        if (Op0->hasOneUse() && C1DivC->isNormalFP())
+          return BinaryOperator::CreateFDivFMF(X, C1DivC, &I);
+      }
+
+      // We do not need to match 'fadd C, X' and 'fsub X, C' because they are
+      // canonicalized to 'fadd X, C'. Distributing the multiply may allow
+      // further folds and (X * C) + C2 is 'fma'.
+      if (match(Op0, m_OneUse(m_FAdd(m_Value(X), m_Constant(C1))))) {
+        // (X + C1) * C --> (X * C) + (C * C1)
+        Constant *CC1 = ConstantExpr::getFMul(C, C1);
+        Value *XC = Builder.CreateFMulFMF(X, C, &I);
+        return BinaryOperator::CreateFAddFMF(XC, CC1, &I);
+      }
+      if (match(Op0, m_OneUse(m_FSub(m_Constant(C1), m_Value(X))))) {
+        // (C1 - X) * C --> (C * C1) - (X * C)
+        Constant *CC1 = ConstantExpr::getFMul(C, C1);
+        Value *XC = Builder.CreateFMulFMF(X, C, &I);
+        return BinaryOperator::CreateFSubFMF(CC1, XC, &I);
+      }
+    }
+
+    // sqrt(X) * sqrt(Y) -> sqrt(X * Y)
+    // nnan disallows the possibility of returning a number if both operands are
+    // negative (in that case, we should return NaN).
+    if (I.hasNoNaNs() &&
+        match(Op0, m_OneUse(m_Intrinsic<Intrinsic::sqrt>(m_Value(X)))) &&
+        match(Op1, m_OneUse(m_Intrinsic<Intrinsic::sqrt>(m_Value(Y))))) {
+      Value *XY = Builder.CreateFMulFMF(X, Y, &I);
+      Value *Sqrt = Builder.CreateIntrinsic(Intrinsic::sqrt, { XY }, &I);
+      return replaceInstUsesWith(I, Sqrt);
+    }
+
+    // (X*Y) * X => (X*X) * Y where Y != X
+    //  The purpose is two-fold:
+    //   1) to form a power expression (of X).
+    //   2) potentially shorten the critical path: After transformation, the
+    //  latency of the instruction Y is amortized by the expression of X*X,
+    //  and therefore Y is in a "less critical" position compared to what it
+    //  was before the transformation.
+    if (match(Op0, m_OneUse(m_c_FMul(m_Specific(Op1), m_Value(Y)))) &&
+        Op1 != Y) {
+      Value *XX = Builder.CreateFMulFMF(Op1, Op1, &I);
+      return BinaryOperator::CreateFMulFMF(XX, Y, &I);
+    }
+    if (match(Op1, m_OneUse(m_c_FMul(m_Specific(Op0), m_Value(Y)))) &&
+        Op0 != Y) {
+      Value *XX = Builder.CreateFMulFMF(Op0, Op0, &I);
+      return BinaryOperator::CreateFMulFMF(XX, Y, &I);
     }
   }
 
@@ -552,37 +583,6 @@ Instruction *InstCombiner::visitFMul(Bin
     }
   }
 
-  // sqrt(X) * sqrt(Y) -> sqrt(X * Y)
-  // nnan disallows the possibility of returning a number if both operands are
-  // negative (in that case, we should return NaN).
-  if (I.hasAllowReassoc() && I.hasNoNaNs() &&
-      match(Op0, m_OneUse(m_Intrinsic<Intrinsic::sqrt>(m_Value(X)))) &&
-      match(Op1, m_OneUse(m_Intrinsic<Intrinsic::sqrt>(m_Value(Y))))) {
-    Value *XY = Builder.CreateFMulFMF(X, Y, &I);
-    Value *Sqrt = Builder.CreateIntrinsic(Intrinsic::sqrt, { XY }, &I);
-    return replaceInstUsesWith(I, Sqrt);
-  }
-
-  // (X*Y) * X => (X*X) * Y where Y != X
-  //  The purpose is two-fold:
-  //   1) to form a power expression (of X).
-  //   2) potentially shorten the critical path: After transformation, the
-  //  latency of the instruction Y is amortized by the expression of X*X,
-  //  and therefore Y is in a "less critical" position compared to what it
-  //  was before the transformation.
-  if (I.hasAllowReassoc()) {
-    if (match(Op0, m_OneUse(m_c_FMul(m_Specific(Op1), m_Value(Y)))) &&
-        Op1 != Y) {
-      Value *XX = Builder.CreateFMulFMF(Op1, Op1, &I);
-      return BinaryOperator::CreateFMulFMF(XX, Y, &I);
-    }
-    if (match(Op1, m_OneUse(m_c_FMul(m_Specific(Op0), m_Value(Y)))) &&
-        Op0 != Y) {
-      Value *XX = Builder.CreateFMulFMF(Op0, Op0, &I);
-      return BinaryOperator::CreateFMulFMF(XX, Y, &I);
-    }
-  }
-
   return Changed ? &I : nullptr;
 }
 

Modified: llvm/trunk/test/Transforms/InstCombine/fmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/fmul.ll?rev=329121&r1=329120&r2=329121&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/fmul.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/fmul.ll Tue Apr  3 15:19:19 2018
@@ -336,11 +336,11 @@ define float @log2half_commute(float %x1
 
 define float @fdiv_constant_numerator_fmul(float %x) {
 ; CHECK-LABEL: @fdiv_constant_numerator_fmul(
-; CHECK-NEXT:    [[T3:%.*]] = fdiv fast float 1.200000e+07, [[X:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = fdiv reassoc float 1.200000e+07, [[X:%.*]]
 ; CHECK-NEXT:    ret float [[T3]]
 ;
   %t1 = fdiv float 2.0e+3, %x
-  %t3 = fmul fast float %t1, 6.0e+3
+  %t3 = fmul reassoc float %t1, 6.0e+3
   ret float %t3
 }
 
@@ -365,21 +365,21 @@ define float @fdiv_constant_numerator_fm
 
 define float @fdiv_constant_denominator_fmul(float %x) {
 ; CHECK-LABEL: @fdiv_constant_denominator_fmul(
-; CHECK-NEXT:    [[T3:%.*]] = fmul fast float [[X:%.*]], 3.000000e+00
+; CHECK-NEXT:    [[T3:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00
 ; CHECK-NEXT:    ret float [[T3]]
 ;
   %t1 = fdiv float %x, 2.0e+3
-  %t3 = fmul fast float %t1, 6.0e+3
+  %t3 = fmul reassoc float %t1, 6.0e+3
   ret float %t3
 }
 
 define <4 x float> @fdiv_constant_denominator_fmul_vec(<4 x float> %x) {
 ; CHECK-LABEL: @fdiv_constant_denominator_fmul_vec(
-; CHECK-NEXT:    [[T3:%.*]] = fmul fast <4 x float> [[X:%.*]], <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[T3:%.*]] = fmul reassoc <4 x float> [[X:%.*]], <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00>
 ; CHECK-NEXT:    ret <4 x float> [[T3]]
 ;
   %t1 = fdiv <4 x float> %x, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3>
-  %t3 = fmul fast <4 x float> %t1, <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3>
+  %t3 = fmul reassoc <4 x float> %t1, <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3>
   ret <4 x float> %t3
 }
 
@@ -387,12 +387,12 @@ define <4 x float> @fdiv_constant_denomi
 
 define <4 x float> @fdiv_constant_denominator_fmul_vec_constexpr(<4 x float> %x) {
 ; CHECK-LABEL: @fdiv_constant_denominator_fmul_vec_constexpr(
-; CHECK-NEXT:    [[T3:%.*]] = fmul fast <4 x float> [[X:%.*]], <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[T3:%.*]] = fmul reassoc <4 x float> [[X:%.*]], <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00>
 ; CHECK-NEXT:    ret <4 x float> [[T3]]
 ;
   %constExprMul = bitcast i128 trunc (i160 bitcast (<5 x float> <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3, float undef> to i160) to i128) to <4 x float>
   %t1 = fdiv <4 x float> %x, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3>
-  %t3 = fmul fast <4 x float> %t1, %constExprMul
+  %t3 = fmul reassoc <4 x float> %t1, %constExprMul
   ret <4 x float> %t3
 }
 
@@ -416,11 +416,11 @@ define float @fdiv_constant_denominator_
 
 define float @fdiv_constant_denominator_fmul_denorm_try_harder(float %x) {
 ; CHECK-LABEL: @fdiv_constant_denominator_fmul_denorm_try_harder(
-; CHECK-NEXT:    [[T3:%.*]] = fdiv fast float [[X:%.*]], 0x47E8000000000000
+; CHECK-NEXT:    [[T3:%.*]] = fdiv reassoc float [[X:%.*]], 0x47E8000000000000
 ; CHECK-NEXT:    ret float [[T3]]
 ;
   %t1 = fdiv float %x, 3.0
-  %t3 = fmul fast float %t1, 0x3810000000000000
+  %t3 = fmul reassoc float %t1, 0x3810000000000000
   ret float %t3
 }
 
@@ -443,12 +443,12 @@ define float @fdiv_constant_denominator_
 
 define float @fmul_fadd_distribute(float %x) {
 ; CHECK-LABEL: @fmul_fadd_distribute(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], 3.000000e+00
-; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[TMP1]], 6.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00
+; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[TMP1]], 6.000000e+00
 ; CHECK-NEXT:    ret float [[T3]]
 ;
   %t2 = fadd float %x, 2.0
-  %t3 = fmul fast float %t2, 3.0
+  %t3 = fmul reassoc float %t2, 3.0
   ret float %t3
 }
 
@@ -456,12 +456,12 @@ define float @fmul_fadd_distribute(float
 
 define float @fmul_fsub_distribute1(float %x) {
 ; CHECK-LABEL: @fmul_fsub_distribute1(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], 3.000000e+00
-; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[TMP1]], -6.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00
+; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[TMP1]], -6.000000e+00
 ; CHECK-NEXT:    ret float [[T3]]
 ;
   %t2 = fsub float %x, 2.0
-  %t3 = fmul fast float %t2, 3.0
+  %t3 = fmul reassoc float %t2, 3.0
   ret float %t3
 }
 
@@ -469,15 +469,16 @@ define float @fmul_fsub_distribute1(floa
 
 define float @fmul_fsub_distribute2(float %x) {
 ; CHECK-LABEL: @fmul_fsub_distribute2(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], 3.000000e+00
-; CHECK-NEXT:    [[T3:%.*]] = fsub fast float 6.000000e+00, [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00
+; CHECK-NEXT:    [[T3:%.*]] = fsub reassoc float 6.000000e+00, [[TMP1]]
 ; CHECK-NEXT:    ret float [[T3]]
 ;
   %t2 = fsub float 2.0, %x
-  %t3 = fmul fast float %t2, 3.0
+  %t3 = fmul reassoc float %t2, 3.0
   ret float %t3
 }
 
+; FIXME: This should only need 'reassoc'.
 ; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3)
 
 define float @fmul_fadd_fmul_distribute(float %x) {
@@ -514,13 +515,13 @@ define float @fmul_fadd_distribute_extra
 
 define double @fmul_fadd_fdiv_distribute2(double %x) {
 ; CHECK-LABEL: @fmul_fadd_fdiv_distribute2(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv fast double [[X:%.*]], 0x7FE8000000000000
-; CHECK-NEXT:    [[T3:%.*]] = fadd fast double [[TMP1]], 0x34000000000000
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc double [[X:%.*]], 0x7FE8000000000000
+; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc double [[TMP1]], 0x34000000000000
 ; CHECK-NEXT:    ret double [[T3]]
 ;
   %t1 = fdiv double %x, 3.0
   %t2 = fadd double %t1, 5.0
-  %t3 = fmul fast double %t2, 0x10000000000000
+  %t3 = fmul reassoc double %t2, 0x10000000000000
   ret double %t3
 }
 
@@ -529,16 +530,17 @@ define double @fmul_fadd_fdiv_distribute
 
 define double @fmul_fadd_fdiv_distribute3(double %x) {
 ; CHECK-LABEL: @fmul_fadd_fdiv_distribute3(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv fast double [[X:%.*]], 0x7FE8000000000000
-; CHECK-NEXT:    [[T3:%.*]] = fadd fast double [[TMP1]], 0x34000000000000
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc double [[X:%.*]], 0x7FE8000000000000
+; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc double [[TMP1]], 0x34000000000000
 ; CHECK-NEXT:    ret double [[T3]]
 ;
   %t1 = fdiv double %x, 3.0
   %t2 = fadd double %t1, 5.0
-  %t3 = fmul fast double %t2, 0x10000000000000
+  %t3 = fmul reassoc double %t2, 0x10000000000000
   ret double %t3
 }
 
+; FIXME: This should only need 'reassoc'.
 ; (C2 - (X*C1)) * C3 => (C2*C3) - (X * (C1*C3))
 
 define float @fmul_fsub_fmul_distribute(float %x) {
@@ -568,6 +570,7 @@ define float @fmul_fsub_fmul_distribute_
   ret float %t3
 }
 
+; FIXME: This should only need 'reassoc'.
 ; ((X*C1) - C2) * C3 => (X * (C1*C3)) - C2*C3
 
 define float @fmul_fsub_fmul_distribute2(float %x) {