[llvm] r315740 - DAG: Add opcode and source type to isFPExtFree
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 13 12:55:45 PDT 2017
Author: arsenm
Date: Fri Oct 13 12:55:45 2017
New Revision: 315740
URL: http://llvm.org/viewvc/llvm-project?rev=315740&view=rev
Log:
DAG: Add opcode and source type to isFPExtFree
This is only currently used for mad/fma transforms.
This is the only case where it should be used for AMDGPU,
so add an opcode to be sure.
Modified:
llvm/trunk/include/llvm/Target/TargetLowering.h
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=315740&r1=315739&r2=315740&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Fri Oct 13 12:55:45 2017
@@ -1993,7 +1993,8 @@ public:
bool isExtFree(const Instruction *I) const {
switch (I->getOpcode()) {
case Instruction::FPExt:
- if (isFPExtFree(EVT::getEVT(I->getType())))
+ if (isFPExtFree(EVT::getEVT(I->getType()),
+ EVT::getEVT(I->getOperand(0)->getType())))
return true;
break;
case Instruction::ZExt:
@@ -2120,11 +2121,21 @@ public:
/// Return true if an fpext operation is free (for instance, because
/// single-precision floating-point numbers are implicitly extended to
/// double-precision).
- virtual bool isFPExtFree(EVT VT) const {
- assert(VT.isFloatingPoint());
+ virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const {
+ assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() &&
+ "invalid fpext types");
return false;
}
+ /// Return true if an fpext operation input to an \p Opcode operation is free
+ /// (for instance, because half-precision floating-point numbers are
+ /// implicitly extended to float-precision) for an FMA instruction.
+ virtual bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const {
+ assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
+ "invalid fpext types");
+ return isFPExtFree(DestVT, SrcVT);
+ }
+
/// Return true if folding a vector load into ExtVal (a sign, zero, or any
/// extend node) is profitable.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; }
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=315740&r1=315739&r2=315740&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Oct 13 12:55:45 2017
@@ -9095,7 +9095,6 @@ SDValue DAGCombiner::visitFADDForFMAComb
// Always prefer FMAD to FMA for precision.
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
- bool LookThroughFPExt = TLI.isFPExtFree(VT);
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
@@ -9125,28 +9124,31 @@ SDValue DAGCombiner::visitFADDForFMAComb
}
// Look through FP_EXTEND nodes to do more combining.
- if (LookThroughFPExt) {
- // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (isContractableFMUL(N00))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)), N1);
+
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (isContractableFMUL(N00) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)), N1);
}
+ }
- // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
- // Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
- SDValue N10 = N1.getOperand(0);
- if (isContractableFMUL(N10))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(1)), N0);
+ // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (isContractableFMUL(N10) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(1)), N0);
}
}
@@ -9182,80 +9184,87 @@ SDValue DAGCombiner::visitFADDForFMAComb
N0));
}
- if (LookThroughFPExt) {
- // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
- // -> (fma x, y, (fma (fpext u), (fpext v), z))
- auto FoldFAddFMAFPExtFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z));
- };
- if (N0.getOpcode() == PreferredFusedOpcode) {
- SDValue N02 = N0.getOperand(2);
- if (N02.getOpcode() == ISD::FP_EXTEND) {
- SDValue N020 = N02.getOperand(0);
- if (isContractableFMUL(N020))
- return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
- N020.getOperand(0), N020.getOperand(1),
- N1);
+
+ // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ auto FoldFAddFMAFPExtFMul = [&] (
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+ Z));
+ };
+ if (N0.getOpcode() == PreferredFusedOpcode) {
+ SDValue N02 = N0.getOperand(2);
+ if (N02.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N020 = N02.getOperand(0);
+ if (isContractableFMUL(N020) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
+ return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
+ N020.getOperand(0), N020.getOperand(1),
+ N1);
}
}
+ }
- // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
- // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
- // FIXME: This turns two single-precision and one double-precision
- // operation into two double-precision operations, which might not be
- // interesting for all targets, especially GPUs.
- auto FoldFAddFPExtFMAFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z));
- };
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == PreferredFusedOpcode) {
- SDValue N002 = N00.getOperand(2);
- if (isContractableFMUL(N002))
- return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
- N002.getOperand(0), N002.getOperand(1),
- N1);
+ // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ auto FoldFAddFPExtFMAFMul = [&] (
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+ Z));
+ };
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == PreferredFusedOpcode) {
+ SDValue N002 = N00.getOperand(2);
+ if (isContractableFMUL(N002) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
+ N002.getOperand(0), N002.getOperand(1),
+ N1);
}
}
+ }
- // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
- // -> (fma y, z, (fma (fpext u), (fpext v), x))
- if (N1.getOpcode() == PreferredFusedOpcode) {
- SDValue N12 = N1.getOperand(2);
- if (N12.getOpcode() == ISD::FP_EXTEND) {
- SDValue N120 = N12.getOperand(0);
- if (isContractableFMUL(N120))
- return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
- N120.getOperand(0), N120.getOperand(1),
- N0);
+ // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
+ // -> (fma y, z, (fma (fpext u), (fpext v), x))
+ if (N1.getOpcode() == PreferredFusedOpcode) {
+ SDValue N12 = N1.getOperand(2);
+ if (N12.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N120 = N12.getOperand(0);
+ if (isContractableFMUL(N120) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+ return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
+ N120.getOperand(0), N120.getOperand(1),
+ N0);
}
}
+ }
- // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
- // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
- // FIXME: This turns two single-precision and one double-precision
- // operation into two double-precision operations, which might not be
- // interesting for all targets, especially GPUs.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
- SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == PreferredFusedOpcode) {
- SDValue N102 = N10.getOperand(2);
- if (isContractableFMUL(N102))
- return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
- N102.getOperand(0), N102.getOperand(1),
- N0);
+ // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
+ // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == PreferredFusedOpcode) {
+ SDValue N102 = N10.getOperand(2);
+ if (isContractableFMUL(N102) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
+ N102.getOperand(0), N102.getOperand(1),
+ N0);
}
}
}
@@ -9297,7 +9306,6 @@ SDValue DAGCombiner::visitFSUBForFMAComb
// Always prefer FMAD to FMA for precision.
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
- bool LookThroughFPExt = TLI.isFPExtFree(VT);
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
@@ -9333,79 +9341,83 @@ SDValue DAGCombiner::visitFSUBForFMAComb
}
// Look through FP_EXTEND nodes to do more combining.
- if (LookThroughFPExt) {
- // fold (fsub (fpext (fmul x, y)), z)
- // -> (fma (fpext x), (fpext y), (fneg z))
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (isContractableFMUL(N00))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT, N1));
+
+ // fold (fsub (fpext (fmul x, y)), z)
+ // -> (fma (fpext x), (fpext y), (fneg z))
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (isContractableFMUL(N00) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
}
+ }
- // fold (fsub x, (fpext (fmul y, z)))
- // -> (fma (fneg (fpext y)), (fpext z), x)
- // Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
- SDValue N10 = N1.getOperand(0);
- if (isContractableFMUL(N10))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(0))),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(1)),
- N0);
+ // fold (fsub x, (fpext (fmul y, z)))
+ // -> (fma (fneg (fpext y)), (fpext z), x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (isContractableFMUL(N10) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(0))),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(1)),
+ N0);
}
+ }
- // fold (fsub (fpext (fneg (fmul, x, y))), z)
- // -> (fneg (fma (fpext x), (fpext y), z))
- // Note: This could be removed with appropriate canonicalization of the
- // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
- // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
- // from implementing the canonicalization in visitFSUB.
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FNEG) {
- SDValue N000 = N00.getOperand(0);
- if (isContractableFMUL(N000)) {
- return DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(1)),
- N1));
- }
+ // fold (fsub (fpext (fneg (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x), (fpext y), z))
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FNEG) {
+ SDValue N000 = N00.getOperand(0);
+ if (isContractableFMUL(N000) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ return DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(1)),
+ N1));
}
}
+ }
- // fold (fsub (fneg (fpext (fmul, x, y))), z)
- // -> (fneg (fma (fpext x)), (fpext y), z)
- // Note: This could be removed with appropriate canonicalization of the
- // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
- // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
- // from implementing the canonicalization in visitFSUB.
- if (N0.getOpcode() == ISD::FNEG) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FP_EXTEND) {
- SDValue N000 = N00.getOperand(0);
- if (isContractableFMUL(N000)) {
- return DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(1)),
- N1));
- }
+ // fold (fsub (fneg (fpext (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x)), (fpext y), z)
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (N0.getOpcode() == ISD::FNEG) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N000 = N00.getOperand(0);
+ if (isContractableFMUL(N000) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
+ return DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(1)),
+ N1));
}
}
-
}
// More folding opportunities when target permits.
@@ -9444,102 +9456,108 @@ SDValue DAGCombiner::visitFSUBForFMAComb
N21, N0));
}
- if (LookThroughFPExt) {
- // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
- // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
- if (N0.getOpcode() == PreferredFusedOpcode) {
- SDValue N02 = N0.getOperand(2);
- if (N02.getOpcode() == ISD::FP_EXTEND) {
- SDValue N020 = N02.getOperand(0);
- if (isContractableFMUL(N020))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N020.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N020.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT,
- N1)));
- }
- }
-
- // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
- // -> (fma (fpext x), (fpext y),
- // (fma (fpext u), (fpext v), (fneg z)))
- // FIXME: This turns two single-precision and one double-precision
- // operation into two double-precision operations, which might not be
- // interesting for all targets, especially GPUs.
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == PreferredFusedOpcode) {
- SDValue N002 = N00.getOperand(2);
- if (isContractableFMUL(N002))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N002.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N002.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT,
- N1)));
- }
- }
- // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
- // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
- if (N1.getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
- SDValue N120 = N1.getOperand(2).getOperand(0);
- if (isContractableFMUL(N120)) {
- SDValue N1200 = N120.getOperand(0);
- SDValue N1201 = N120.getOperand(1);
+ // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
+ if (N0.getOpcode() == PreferredFusedOpcode) {
+ SDValue N02 = N0.getOperand(2);
+ if (N02.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N020 = N02.getOperand(0);
+ if (isContractableFMUL(N020) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
- N1.getOperand(1),
+ N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL,
- VT, N1200)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N1201),
- N0));
+ N020.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N020.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1)));
}
}
+ }
- // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
- // -> (fma (fneg (fpext y)), (fpext z),
- // (fma (fneg (fpext u)), (fpext v), x))
- // FIXME: This turns two single-precision and one double-precision
- // operation into two double-precision operations, which might not be
- // interesting for all targets, especially GPUs.
- if (N1.getOpcode() == ISD::FP_EXTEND &&
- N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
- SDValue N100 = N1.getOperand(0).getOperand(0);
- SDValue N101 = N1.getOperand(0).getOperand(1);
- SDValue N102 = N1.getOperand(0).getOperand(2);
- if (isContractableFMUL(N102)) {
- SDValue N1020 = N102.getOperand(0);
- SDValue N1021 = N102.getOperand(1);
+ // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y),
+ // (fma (fpext u), (fpext v), (fneg z)))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == PreferredFusedOpcode) {
+ SDValue N002 = N00.getOperand(2);
+ if (isContractableFMUL(N002) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N100)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)),
DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL,
- VT, N1020)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N1021),
- N0));
+ N002.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N002.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1)));
}
}
}
+
+ // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
+ // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
+ if (N1.getOpcode() == PreferredFusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
+ SDValue N120 = N1.getOperand(2).getOperand(0);
+ if (isContractableFMUL(N120) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+ SDValue N1200 = N120.getOperand(0);
+ SDValue N1201 = N120.getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+ N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL,
+ VT, N1200)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N1201),
+ N0));
+ }
+ }
+
+ // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
+ // -> (fma (fneg (fpext y)), (fpext z),
+ // (fma (fneg (fpext u)), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N1.getOpcode() == ISD::FP_EXTEND &&
+ N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
+ SDValue CvtSrc = N1.getOperand(0);
+ SDValue N100 = CvtSrc.getOperand(0);
+ SDValue N101 = CvtSrc.getOperand(1);
+ SDValue N102 = CvtSrc.getOperand(2);
+ if (isContractableFMUL(N102) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
+ SDValue N1020 = N102.getOperand(0);
+ SDValue N1021 = N102.getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N100)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL,
+ VT, N1020)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N1021),
+ N0));
+ }
+ }
}
return SDValue();
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=315740&r1=315739&r2=315740&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Fri Oct 13 12:55:45 2017
@@ -13273,8 +13273,9 @@ bool PPCTargetLowering::isZExtFree(SDVal
return TargetLowering::isZExtFree(Val, VT2);
}
-bool PPCTargetLowering::isFPExtFree(EVT VT) const {
- assert(VT.isFloatingPoint());
+bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
+ assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
+ "invalid fpext types");
return true;
}
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=315740&r1=315739&r2=315740&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Fri Oct 13 12:55:45 2017
@@ -758,7 +758,7 @@ namespace llvm {
bool isZExtFree(SDValue Val, EVT VT2) const override;
- bool isFPExtFree(EVT VT) const override;
+ bool isFPExtFree(EVT DestVT, EVT SrcVT) const override;
/// \brief Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
More information about the llvm-commits
mailing list