[llvm] r329121 - [InstCombine] allow more fmul folds with 'reassoc'
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 3 15:19:19 PDT 2018
Author: spatel
Date: Tue Apr 3 15:19:19 2018
New Revision: 329121
URL: http://llvm.org/viewvc/llvm-project?rev=329121&view=rev
Log:
[InstCombine] allow more fmul folds with 'reassoc'
The tests marked with 'FIXME' require loosening the check
in SimplifyAssociativeOrCommutative() to optimize completely;
that's still checking isFast() in Instruction::isAssociative().
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
llvm/trunk/test/Transforms/InstCombine/fmul.ll
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp?rev=329121&r1=329120&r2=329121&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp Tue Apr 3 15:19:19 2018
@@ -491,43 +491,74 @@ Instruction *InstCombiner::visitFMul(Bin
if (Value *V = SimplifySelectsFeedingBinaryOp(I, Op0, Op1))
return replaceInstUsesWith(I, V);
- // Reassociate constant RHS with another constant to form constant expression.
- // FIXME: These folds do not require all FMF.
- if (I.isFast() && match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP()) {
- Constant *C1;
- if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) {
- // (C1 / X) * C --> (C * C1) / X
- Constant *CC1 = ConstantExpr::getFMul(C, C1);
- if (CC1->isNormalFP())
- return BinaryOperator::CreateFDivFMF(CC1, X, &I);
- }
- if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) {
- // (X / C1) * C --> X * (C / C1)
- Constant *CDivC1 = ConstantExpr::getFDiv(C, C1);
- if (CDivC1->isNormalFP())
- return BinaryOperator::CreateFMulFMF(X, CDivC1, &I);
-
- // If the constant was a denormal, try reassociating differently.
- // (X / C1) * C --> X / (C1 / C)
- Constant *C1DivC = ConstantExpr::getFDiv(C1, C);
- if (Op0->hasOneUse() && C1DivC->isNormalFP())
- return BinaryOperator::CreateFDivFMF(X, C1DivC, &I);
- }
-
- // We do not need to match 'fadd C, X' and 'fsub X, C' because they are
- // canonicalized to 'fadd X, C'. Distributing the multiply may allow further
- // folds and (X * C) + C2 is 'fma'.
- if (match(Op0, m_OneUse(m_FAdd(m_Value(X), m_Constant(C1))))) {
- // (X + C1) * C --> (X * C) + (C * C1)
- Constant *CC1 = ConstantExpr::getFMul(C, C1);
- Value *XC = Builder.CreateFMulFMF(X, C, &I);
- return BinaryOperator::CreateFAddFMF(XC, CC1, &I);
- }
- if (match(Op0, m_OneUse(m_FSub(m_Constant(C1), m_Value(X))))) {
- // (C1 - X) * C --> (C * C1) - (X * C)
- Constant *CC1 = ConstantExpr::getFMul(C, C1);
- Value *XC = Builder.CreateFMulFMF(X, C, &I);
- return BinaryOperator::CreateFSubFMF(CC1, XC, &I);
+ if (I.hasAllowReassoc()) {
+ // Reassociate constant RHS with another constant to form constant
+ // expression.
+ if (match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP()) {
+ Constant *C1;
+ if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) {
+ // (C1 / X) * C --> (C * C1) / X
+ Constant *CC1 = ConstantExpr::getFMul(C, C1);
+ if (CC1->isNormalFP())
+ return BinaryOperator::CreateFDivFMF(CC1, X, &I);
+ }
+ if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) {
+ // (X / C1) * C --> X * (C / C1)
+ Constant *CDivC1 = ConstantExpr::getFDiv(C, C1);
+ if (CDivC1->isNormalFP())
+ return BinaryOperator::CreateFMulFMF(X, CDivC1, &I);
+
+ // If the constant was a denormal, try reassociating differently.
+ // (X / C1) * C --> X / (C1 / C)
+ Constant *C1DivC = ConstantExpr::getFDiv(C1, C);
+ if (Op0->hasOneUse() && C1DivC->isNormalFP())
+ return BinaryOperator::CreateFDivFMF(X, C1DivC, &I);
+ }
+
+ // We do not need to match 'fadd C, X' and 'fsub X, C' because they are
+ // canonicalized to 'fadd X, C'. Distributing the multiply may allow
+ // further folds and (X * C) + C2 is 'fma'.
+ if (match(Op0, m_OneUse(m_FAdd(m_Value(X), m_Constant(C1))))) {
+ // (X + C1) * C --> (X * C) + (C * C1)
+ Constant *CC1 = ConstantExpr::getFMul(C, C1);
+ Value *XC = Builder.CreateFMulFMF(X, C, &I);
+ return BinaryOperator::CreateFAddFMF(XC, CC1, &I);
+ }
+ if (match(Op0, m_OneUse(m_FSub(m_Constant(C1), m_Value(X))))) {
+ // (C1 - X) * C --> (C * C1) - (X * C)
+ Constant *CC1 = ConstantExpr::getFMul(C, C1);
+ Value *XC = Builder.CreateFMulFMF(X, C, &I);
+ return BinaryOperator::CreateFSubFMF(CC1, XC, &I);
+ }
+ }
+
+ // sqrt(X) * sqrt(Y) -> sqrt(X * Y)
+ // nnan disallows the possibility of returning a number if both operands are
+ // negative (in that case, we should return NaN).
+ if (I.hasNoNaNs() &&
+ match(Op0, m_OneUse(m_Intrinsic<Intrinsic::sqrt>(m_Value(X)))) &&
+ match(Op1, m_OneUse(m_Intrinsic<Intrinsic::sqrt>(m_Value(Y))))) {
+ Value *XY = Builder.CreateFMulFMF(X, Y, &I);
+ Value *Sqrt = Builder.CreateIntrinsic(Intrinsic::sqrt, { XY }, &I);
+ return replaceInstUsesWith(I, Sqrt);
+ }
+
+ // (X*Y) * X => (X*X) * Y where Y != X
+ // The purpose is two-fold:
+ // 1) to form a power expression (of X).
+ // 2) potentially shorten the critical path: After transformation, the
+ // latency of the instruction Y is amortized by the expression of X*X,
+ // and therefore Y is in a "less critical" position compared to what it
+ // was before the transformation.
+ if (match(Op0, m_OneUse(m_c_FMul(m_Specific(Op1), m_Value(Y)))) &&
+ Op1 != Y) {
+ Value *XX = Builder.CreateFMulFMF(Op1, Op1, &I);
+ return BinaryOperator::CreateFMulFMF(XX, Y, &I);
+ }
+ if (match(Op1, m_OneUse(m_c_FMul(m_Specific(Op0), m_Value(Y)))) &&
+ Op0 != Y) {
+ Value *XX = Builder.CreateFMulFMF(Op0, Op0, &I);
+ return BinaryOperator::CreateFMulFMF(XX, Y, &I);
}
}
@@ -552,37 +583,6 @@ Instruction *InstCombiner::visitFMul(Bin
}
}
- // sqrt(X) * sqrt(Y) -> sqrt(X * Y)
- // nnan disallows the possibility of returning a number if both operands are
- // negative (in that case, we should return NaN).
- if (I.hasAllowReassoc() && I.hasNoNaNs() &&
- match(Op0, m_OneUse(m_Intrinsic<Intrinsic::sqrt>(m_Value(X)))) &&
- match(Op1, m_OneUse(m_Intrinsic<Intrinsic::sqrt>(m_Value(Y))))) {
- Value *XY = Builder.CreateFMulFMF(X, Y, &I);
- Value *Sqrt = Builder.CreateIntrinsic(Intrinsic::sqrt, { XY }, &I);
- return replaceInstUsesWith(I, Sqrt);
- }
-
- // (X*Y) * X => (X*X) * Y where Y != X
- // The purpose is two-fold:
- // 1) to form a power expression (of X).
- // 2) potentially shorten the critical path: After transformation, the
- // latency of the instruction Y is amortized by the expression of X*X,
- // and therefore Y is in a "less critical" position compared to what it
- // was before the transformation.
- if (I.hasAllowReassoc()) {
- if (match(Op0, m_OneUse(m_c_FMul(m_Specific(Op1), m_Value(Y)))) &&
- Op1 != Y) {
- Value *XX = Builder.CreateFMulFMF(Op1, Op1, &I);
- return BinaryOperator::CreateFMulFMF(XX, Y, &I);
- }
- if (match(Op1, m_OneUse(m_c_FMul(m_Specific(Op0), m_Value(Y)))) &&
- Op0 != Y) {
- Value *XX = Builder.CreateFMulFMF(Op0, Op0, &I);
- return BinaryOperator::CreateFMulFMF(XX, Y, &I);
- }
- }
-
return Changed ? &I : nullptr;
}
Modified: llvm/trunk/test/Transforms/InstCombine/fmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/fmul.ll?rev=329121&r1=329120&r2=329121&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/fmul.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/fmul.ll Tue Apr 3 15:19:19 2018
@@ -336,11 +336,11 @@ define float @log2half_commute(float %x1
define float @fdiv_constant_numerator_fmul(float %x) {
; CHECK-LABEL: @fdiv_constant_numerator_fmul(
-; CHECK-NEXT: [[T3:%.*]] = fdiv fast float 1.200000e+07, [[X:%.*]]
+; CHECK-NEXT: [[T3:%.*]] = fdiv reassoc float 1.200000e+07, [[X:%.*]]
; CHECK-NEXT: ret float [[T3]]
;
%t1 = fdiv float 2.0e+3, %x
- %t3 = fmul fast float %t1, 6.0e+3
+ %t3 = fmul reassoc float %t1, 6.0e+3
ret float %t3
}
@@ -365,21 +365,21 @@ define float @fdiv_constant_numerator_fm
define float @fdiv_constant_denominator_fmul(float %x) {
; CHECK-LABEL: @fdiv_constant_denominator_fmul(
-; CHECK-NEXT: [[T3:%.*]] = fmul fast float [[X:%.*]], 3.000000e+00
+; CHECK-NEXT: [[T3:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00
; CHECK-NEXT: ret float [[T3]]
;
%t1 = fdiv float %x, 2.0e+3
- %t3 = fmul fast float %t1, 6.0e+3
+ %t3 = fmul reassoc float %t1, 6.0e+3
ret float %t3
}
define <4 x float> @fdiv_constant_denominator_fmul_vec(<4 x float> %x) {
; CHECK-LABEL: @fdiv_constant_denominator_fmul_vec(
-; CHECK-NEXT: [[T3:%.*]] = fmul fast <4 x float> [[X:%.*]], <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: [[T3:%.*]] = fmul reassoc <4 x float> [[X:%.*]], <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00>
; CHECK-NEXT: ret <4 x float> [[T3]]
;
%t1 = fdiv <4 x float> %x, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3>
- %t3 = fmul fast <4 x float> %t1, <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3>
+ %t3 = fmul reassoc <4 x float> %t1, <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3>
ret <4 x float> %t3
}
@@ -387,12 +387,12 @@ define <4 x float> @fdiv_constant_denomi
define <4 x float> @fdiv_constant_denominator_fmul_vec_constexpr(<4 x float> %x) {
; CHECK-LABEL: @fdiv_constant_denominator_fmul_vec_constexpr(
-; CHECK-NEXT: [[T3:%.*]] = fmul fast <4 x float> [[X:%.*]], <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: [[T3:%.*]] = fmul reassoc <4 x float> [[X:%.*]], <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00>
; CHECK-NEXT: ret <4 x float> [[T3]]
;
%constExprMul = bitcast i128 trunc (i160 bitcast (<5 x float> <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3, float undef> to i160) to i128) to <4 x float>
%t1 = fdiv <4 x float> %x, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3>
- %t3 = fmul fast <4 x float> %t1, %constExprMul
+ %t3 = fmul reassoc <4 x float> %t1, %constExprMul
ret <4 x float> %t3
}
@@ -416,11 +416,11 @@ define float @fdiv_constant_denominator_
define float @fdiv_constant_denominator_fmul_denorm_try_harder(float %x) {
; CHECK-LABEL: @fdiv_constant_denominator_fmul_denorm_try_harder(
-; CHECK-NEXT: [[T3:%.*]] = fdiv fast float [[X:%.*]], 0x47E8000000000000
+; CHECK-NEXT: [[T3:%.*]] = fdiv reassoc float [[X:%.*]], 0x47E8000000000000
; CHECK-NEXT: ret float [[T3]]
;
%t1 = fdiv float %x, 3.0
- %t3 = fmul fast float %t1, 0x3810000000000000
+ %t3 = fmul reassoc float %t1, 0x3810000000000000
ret float %t3
}
@@ -443,12 +443,12 @@ define float @fdiv_constant_denominator_
define float @fmul_fadd_distribute(float %x) {
; CHECK-LABEL: @fmul_fadd_distribute(
-; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], 3.000000e+00
-; CHECK-NEXT: [[T3:%.*]] = fadd fast float [[TMP1]], 6.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00
+; CHECK-NEXT: [[T3:%.*]] = fadd reassoc float [[TMP1]], 6.000000e+00
; CHECK-NEXT: ret float [[T3]]
;
%t2 = fadd float %x, 2.0
- %t3 = fmul fast float %t2, 3.0
+ %t3 = fmul reassoc float %t2, 3.0
ret float %t3
}
@@ -456,12 +456,12 @@ define float @fmul_fadd_distribute(float
define float @fmul_fsub_distribute1(float %x) {
; CHECK-LABEL: @fmul_fsub_distribute1(
-; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], 3.000000e+00
-; CHECK-NEXT: [[T3:%.*]] = fadd fast float [[TMP1]], -6.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00
+; CHECK-NEXT: [[T3:%.*]] = fadd reassoc float [[TMP1]], -6.000000e+00
; CHECK-NEXT: ret float [[T3]]
;
%t2 = fsub float %x, 2.0
- %t3 = fmul fast float %t2, 3.0
+ %t3 = fmul reassoc float %t2, 3.0
ret float %t3
}
@@ -469,15 +469,16 @@ define float @fmul_fsub_distribute1(floa
define float @fmul_fsub_distribute2(float %x) {
; CHECK-LABEL: @fmul_fsub_distribute2(
-; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], 3.000000e+00
-; CHECK-NEXT: [[T3:%.*]] = fsub fast float 6.000000e+00, [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00
+; CHECK-NEXT: [[T3:%.*]] = fsub reassoc float 6.000000e+00, [[TMP1]]
; CHECK-NEXT: ret float [[T3]]
;
%t2 = fsub float 2.0, %x
- %t3 = fmul fast float %t2, 3.0
+ %t3 = fmul reassoc float %t2, 3.0
ret float %t3
}
+; FIXME: This should only need 'reassoc'.
; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3)
define float @fmul_fadd_fmul_distribute(float %x) {
@@ -514,13 +515,13 @@ define float @fmul_fadd_distribute_extra
define double @fmul_fadd_fdiv_distribute2(double %x) {
; CHECK-LABEL: @fmul_fadd_fdiv_distribute2(
-; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast double [[X:%.*]], 0x7FE8000000000000
-; CHECK-NEXT: [[T3:%.*]] = fadd fast double [[TMP1]], 0x34000000000000
+; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc double [[X:%.*]], 0x7FE8000000000000
+; CHECK-NEXT: [[T3:%.*]] = fadd reassoc double [[TMP1]], 0x34000000000000
; CHECK-NEXT: ret double [[T3]]
;
%t1 = fdiv double %x, 3.0
%t2 = fadd double %t1, 5.0
- %t3 = fmul fast double %t2, 0x10000000000000
+ %t3 = fmul reassoc double %t2, 0x10000000000000
ret double %t3
}
@@ -529,16 +530,17 @@ define double @fmul_fadd_fdiv_distribute
define double @fmul_fadd_fdiv_distribute3(double %x) {
; CHECK-LABEL: @fmul_fadd_fdiv_distribute3(
-; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast double [[X:%.*]], 0x7FE8000000000000
-; CHECK-NEXT: [[T3:%.*]] = fadd fast double [[TMP1]], 0x34000000000000
+; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc double [[X:%.*]], 0x7FE8000000000000
+; CHECK-NEXT: [[T3:%.*]] = fadd reassoc double [[TMP1]], 0x34000000000000
; CHECK-NEXT: ret double [[T3]]
;
%t1 = fdiv double %x, 3.0
%t2 = fadd double %t1, 5.0
- %t3 = fmul fast double %t2, 0x10000000000000
+ %t3 = fmul reassoc double %t2, 0x10000000000000
ret double %t3
}
+; FIXME: This should only need 'reassoc'.
; (C2 - (X*C1)) * C3 => (C2*C3) - (X * (C1*C3))
define float @fmul_fsub_fmul_distribute(float %x) {
@@ -568,6 +570,7 @@ define float @fmul_fsub_fmul_distribute_
ret float %t3
}
+; FIXME: This should only need 'reassoc'.
; ((X*C1) - C2) * C3 => (X * (C1*C3)) - C2*C3
define float @fmul_fsub_fmul_distribute2(float %x) {
More information about the llvm-commits
mailing list