[llvm] r222510 - DAGCombiner: Allow the DAGCombiner to combine multiple FDIVs with the same divisor info FMULs by the reciprocal.
Hao Liu
Hao.Liu at arm.com
Thu Nov 20 22:39:58 PST 2014
Author: haoliu
Date: Fri Nov 21 00:39:58 2014
New Revision: 222510
URL: http://llvm.org/viewvc/llvm-project?rev=222510&view=rev
Log:
DAGCombiner: Allow the DAGCombiner to combine multiple FDIVs with the same divisor info FMULs by the reciprocal.
E.g., ( a / D; b / D ) -> ( recip = 1.0 / D; a * recip; b * recip)
A hook is added to allow the target to control whether it needs to do such combine.
Reviewed in http://reviews.llvm.org/D6334
Added:
llvm/trunk/test/CodeGen/AArch64/fdiv-combine.ll
Modified:
llvm/trunk/include/llvm/Target/TargetLowering.h
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=222510&r1=222509&r2=222510&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Fri Nov 21 00:39:58 2014
@@ -2652,6 +2652,12 @@ public:
return SDValue();
}
+ /// Indicate whether this target prefers to combine the given number of FDIVs
+ /// with the same divisor.
+ virtual bool combineRepeatedFPDivisors(unsigned NumUsers) const {
+ return false;
+ }
+
/// Hooks for building estimates in place of slower divisions and square
/// roots.
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=222510&r1=222509&r2=222510&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Nov 21 00:39:58 2014
@@ -7104,6 +7104,44 @@ SDValue DAGCombiner::visitFDIV(SDNode *N
}
}
+ // Combine multiple FDIVs with the same divisor into multiple FMULs by the
+ // reciprocal.
+ // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
+ // Notice that this is not always beneficial. One reason is different target
+ // may have different costs for FDIV and FMUL, so sometimes the cost of two
+ // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
+ // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
+ if (Options.UnsafeFPMath) {
+ // Skip if current node is a reciprocal.
+ if (N0CFP && N0CFP->isExactlyValue(1.0))
+ return SDValue();
+
+ SmallVector<SDNode *, 4> Users;
+ // Find all FDIV users of the same divisor.
+ for (SDNode::use_iterator UI = N1.getNode()->use_begin(),
+ UE = N1.getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = UI.getUse().getUser();
+ if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1)
+ Users.push_back(User);
+ }
+
+ if (TLI.combineRepeatedFPDivisors(Users.size())) {
+ SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0
+ SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1);
+
+ // Dividend / Divisor -> Dividend * Reciprocal
+ for (auto I = Users.begin(), E = Users.end(); I != E; ++I) {
+ if ((*I)->getOperand(0) != FPOne) {
+ SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT,
+ (*I)->getOperand(0), Reciprocal);
+ DAG.ReplaceAllUsesWith(*I, NewNode.getNode());
+ }
+ }
+ return SDValue();
+ }
+ }
+
return SDValue();
}
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=222510&r1=222509&r2=222510&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Fri Nov 21 00:39:58 2014
@@ -8732,6 +8732,12 @@ bool AArch64TargetLowering::useLoadStack
return true;
}
+bool AArch64TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+ // Combine multiple FDIVs with the same divisor into multiple FMULs by the
+ // reciprocal if there are three or more FDIVs.
+ return NumUsers > 2;
+}
+
TargetLoweringBase::LegalizeTypeAction
AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
MVT SVT = VT.getSimpleVT();
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h?rev=222510&r1=222509&r2=222510&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h Fri Nov 21 00:39:58 2014
@@ -440,6 +440,7 @@ private:
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
std::vector<SDNode *> *Created) const override;
+ bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
ConstraintType
getConstraintType(const std::string &Constraint) const override;
Added: llvm/trunk/test/CodeGen/AArch64/fdiv-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fdiv-combine.ll?rev=222510&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fdiv-combine.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/fdiv-combine.ll Fri Nov 21 00:39:58 2014
@@ -0,0 +1,94 @@
+; RUN: llc -march=aarch64 < %s | FileCheck %s
+
+; Following test cases check:
+; a / D; b / D; c / D;
+; =>
+; recip = 1.0 / D; a * recip; b * recip; c * recip;
+define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
+; CHECK-LABEL: three_fdiv_float:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+ %div = fdiv float %a, %D
+ %div1 = fdiv float %b, %D
+ %div2 = fdiv float %c, %D
+ tail call void @foo_3f(float %div, float %div1, float %div2)
+ ret void
+}
+
+define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
+; CHECK-LABEL: three_fdiv_double:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+ %div = fdiv double %a, %D
+ %div1 = fdiv double %b, %D
+ %div2 = fdiv double %c, %D
+ tail call void @foo_3d(double %div, double %div1, double %div2)
+ ret void
+}
+
+define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
+; CHECK-LABEL: three_fdiv_4xfloat:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+ %div = fdiv <4 x float> %a, %D
+ %div1 = fdiv <4 x float> %b, %D
+ %div2 = fdiv <4 x float> %c, %D
+ tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
+ ret void
+}
+
+define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
+; CHECK-LABEL: three_fdiv_2xdouble:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+ %div = fdiv <2 x double> %a, %D
+ %div1 = fdiv <2 x double> %b, %D
+ %div2 = fdiv <2 x double> %c, %D
+ tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)
+ ret void
+}
+
+; Following test cases check we never combine two FDIVs if neither of them
+; calculates a reciprocal.
+define void @two_fdiv_float(float %D, float %a, float %b) #0 {
+; CHECK-LABEL: two_fdiv_float:
+; CHECK: fdiv
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fmul
+ %div = fdiv float %a, %D
+ %div1 = fdiv float %b, %D
+ tail call void @foo_2f(float %div, float %div1)
+ ret void
+}
+
+define void @two_fdiv_double(double %D, double %a, double %b) #0 {
+; CHECK-LABEL: two_fdiv_double:
+; CHECK: fdiv
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fmul
+ %div = fdiv double %a, %D
+ %div1 = fdiv double %b, %D
+ tail call void @foo_2d(double %div, double %div1)
+ ret void
+}
+
+declare void @foo_3f(float, float, float)
+declare void @foo_3d(double, double, double)
+declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)
+declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
+declare void @foo_2f(float, float)
+declare void @foo_2d(double, double)
+
+attributes #0 = { "unsafe-fp-math"="true" }
More information about the llvm-commits
mailing list