[llvm] r225795 - Added TLI hook for isFPExtFree. Some of the FMA combine heuristics are now guarded with that hook.
Olivier Sallenave
ohsallen at us.ibm.com
Tue Jan 13 07:06:36 PST 2015
Author: ohsallen
Date: Tue Jan 13 09:06:36 2015
New Revision: 225795
URL: http://llvm.org/viewvc/llvm-project?rev=225795&view=rev
Log:
Added TLI hook for isFPExtFree. Some of the FMA combine heuristics are now guarded with that hook.
Modified:
llvm/trunk/include/llvm/Target/TargetLowering.h
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=225795&r1=225794&r2=225795&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Tue Jan 13 09:06:36 2015
@@ -1500,6 +1500,14 @@ public:
return isZExtFree(Val.getValueType(), VT2);
}
+ /// Return true if an fpext operation is free (for instance, because
+ /// single-precision floating-point numbers are implicitly extended to
+ /// double-precision).
+ virtual bool isFPExtFree(EVT VT) const {
+ assert(VT.isFloatingPoint());
+ return false;
+ }
+
/// Return true if an fneg operation is free to the point where it is never
/// worthwhile to replace it with a bitwise operation.
virtual bool isFNegFree(EVT VT) const {
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=225795&r1=225794&r2=225795&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Jan 13 09:06:36 2015
@@ -6957,32 +6957,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
N1.getOperand(0), N1.getOperand(1), N0);
- // More folding opportunities when target permits.
- if (TLI.enableAggressiveFMAFusion(VT)) {
- // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
- if (N0.getOpcode() == ISD::FMA &&
- N0.getOperand(2).getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(2).getOperand(0),
- N0.getOperand(2).getOperand(1),
- N1));
-
- // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
- if (N1->getOpcode() == ISD::FMA &&
- N1.getOperand(2).getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N1.getOperand(0), N1.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N1.getOperand(2).getOperand(0),
- N1.getOperand(2).getOperand(1),
- N0));
+ // When FP_EXTEND nodes are free on the target, and there is an opportunity
+ // to combine into FMA, arrange such nodes accordingly.
+ if (TLI.isFPExtFree(VT)) {
- // Remove FP_EXTEND when there is an opportunity to combine. This is
- // legal here since extra precision is allowed.
-
- // fold (fadd (fpext (fmul x, y)), z) -> (fma x, y, z)
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FMUL)
@@ -6993,7 +6972,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N
N00.getOperand(1)), N1);
}
- // fold (fadd x, (fpext (fmul y, z)), z) -> (fma y, z, x)
+ // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x)
// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
@@ -7005,6 +6984,30 @@ SDValue DAGCombiner::visitFADD(SDNode *N
N10.getOperand(1)), N0);
}
}
+
+ // More folding opportunities when target permits.
+ if (TLI.enableAggressiveFMAFusion(VT)) {
+
+ // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
+ if (N0.getOpcode() == ISD::FMA &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL)
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ N1));
+
+ // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
+ if (N1->getOpcode() == ISD::FMA &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL)
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ N1.getOperand(0), N1.getOperand(1),
+ DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ N1.getOperand(2).getOperand(0),
+ N1.getOperand(2).getOperand(1),
+ N0));
+ }
}
return SDValue();
@@ -7099,41 +7102,12 @@ SDValue DAGCombiner::visitFSUB(SDNode *N
DAG.getNode(ISD::FNEG, dl, VT, N1));
}
- // More folding opportunities when target permits.
- if (TLI.enableAggressiveFMAFusion(VT)) {
+ // When FP_EXTEND nodes are free on the target, and there is an opportunity
+ // to combine into FMA, arrange such nodes accordingly.
+ if (TLI.isFPExtFree(VT)) {
- // fold (fsub (fma x, y, (fmul u, v)), z)
- // -> (fma x, y (fma u, v, (fneg z)))
- if (N0.getOpcode() == ISD::FMA &&
- N0.getOperand(2).getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(2).getOperand(0),
- N0.getOperand(2).getOperand(1),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N1)));
-
- // fold (fsub x, (fma y, z, (fmul u, v)))
- // -> (fma (fneg y), z, (fma (fneg u), v, x))
- if (N1.getOpcode() == ISD::FMA &&
- N1.getOperand(2).getOpcode() == ISD::FMUL) {
- SDValue N20 = N1.getOperand(2).getOperand(0);
- SDValue N21 = N1.getOperand(2).getOperand(1);
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N1.getOperand(0)),
- N1.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N20),
- N21, N0));
- }
-
- // Remove FP_EXTEND when there is an opportunity to combine. This is
- // legal here since extra precision is allowed.
-
- // fold (fsub (fpext (fmul x, y)), z) -> (fma x, y, (fneg z))
+ // fold (fsub (fpext (fmul x, y)), z)
+ // -> (fma (fpext x), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FMUL)
@@ -7145,7 +7119,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N
DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
}
- // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg y), z, x)
+ // fold (fsub x, (fpext (fmul y, z)))
+ // -> (fma (fneg (fpext y)), (fpext z), x)
// Note: Commutes FSUB operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
@@ -7160,7 +7135,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N
}
// fold (fsub (fpext (fneg (fmul, x, y))), z)
- // -> (fma (fneg x), y, (fneg z))
+ // -> (fma (fneg (fpext x)), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FNEG) {
@@ -7178,7 +7153,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N
}
// fold (fsub (fneg (fpext (fmul, x, y))), z)
- // -> (fma (fneg x), y, (fneg z))
+ // -> (fma (fneg (fpext x)), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FNEG) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FP_EXTEND) {
@@ -7195,6 +7170,38 @@ SDValue DAGCombiner::visitFSUB(SDNode *N
}
}
}
+
+ // More folding opportunities when target permits.
+ if (TLI.enableAggressiveFMAFusion(VT)) {
+
+ // fold (fsub (fma x, y, (fmul u, v)), z)
+ // -> (fma x, y (fma u, v, (fneg z)))
+ if (N0.getOpcode() == ISD::FMA &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL)
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N1)));
+
+ // fold (fsub x, (fma y, z, (fmul u, v)))
+ // -> (fma (fneg y), z, (fma (fneg u), v, x))
+ if (N1.getOpcode() == ISD::FMA &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ SDValue N20 = N1.getOperand(2).getOperand(0);
+ SDValue N21 = N1.getOperand(2).getOperand(1);
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N1.getOperand(0)),
+ N1.getOperand(1),
+ DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N20),
+ N21, N0));
+ }
+ }
}
return SDValue();
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=225795&r1=225794&r2=225795&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Tue Jan 13 09:06:36 2015
@@ -9817,6 +9817,11 @@ bool PPCTargetLowering::isZExtFree(SDVal
return TargetLowering::isZExtFree(Val, VT2);
}
+bool PPCTargetLowering::isFPExtFree(EVT VT) const {
+ assert(VT.isFloatingPoint());
+ return true;
+}
+
bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
return isInt<16>(Imm) || isUInt<16>(Imm);
}
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=225795&r1=225794&r2=225795&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Tue Jan 13 09:06:36 2015
@@ -528,6 +528,8 @@ namespace llvm {
bool isZExtFree(SDValue Val, EVT VT2) const override;
+ bool isFPExtFree(EVT VT) const override;
+
/// \brief Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
More information about the llvm-commits
mailing list