[llvm] r235344 - Refactoring and enhancement to FMA combine.
Olivier Sallenave
ohsallen at us.ibm.com
Mon Apr 20 13:29:40 PDT 2015
Author: ohsallen
Date: Mon Apr 20 15:29:40 2015
New Revision: 235344
URL: http://llvm.org/viewvc/llvm-project?rev=235344&view=rev
Log:
Refactoring and enhancement to FMA combine.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/PowerPC/fma-assoc.ll
llvm/trunk/test/CodeGen/PowerPC/fma-ext.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=235344&r1=235343&r2=235344&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Apr 20 15:29:40 2015
@@ -309,6 +309,9 @@ namespace {
SDValue visitMSTORE(SDNode *N);
SDValue visitFP_TO_FP16(SDNode *N);
+ SDValue visitFADDForFMACombine(SDNode *N);
+ SDValue visitFSUBForFMACombine(SDNode *N);
+
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
@@ -7062,20 +7065,44 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
}
-// Attempt different variants of (fadd (fmul a, b), c) -> fma or fmad
-static SDValue performFaddFmulCombines(unsigned FusedOpcode,
- bool Aggressive,
- SDNode *N,
- const TargetLowering &TLI,
- SelectionDAG &DAG) {
+/// Try to perform FMA combining on a given FADD node.
+SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
+
+
+
+
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ SDLoc SL(N);
+
+ const TargetOptions &Options = DAG.getTarget().Options;
+ bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath);
+
+ // Floating-point multiply-add with intermediate rounding.
+ bool HasFMAD = (LegalOperations &&
+ TLI.isOperationLegal(ISD::FMAD, VT));
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA = ((!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ UnsafeFPMath);
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+ bool LookThroughFPExt = TLI.isFPExtFree(VT);
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (N0.getOpcode() == ISD::FMUL &&
(Aggressive || N0->hasOneUse())) {
- return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1), N1);
}
@@ -7083,53 +7110,180 @@ static SDValue performFaddFmulCombines(u
// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FMUL &&
(Aggressive || N1->hasOneUse())) {
- return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(0), N1.getOperand(1), N0);
}
+ // Look through FP_EXTEND nodes to do more combining.
+ if (UnsafeFPMath && LookThroughFPExt) {
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)), N1);
+ }
+
+ // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(1)), N0);
+ }
+ }
+
// More folding opportunities when target permits.
- if (Aggressive) {
+ if ((UnsafeFPMath || HasFMAD) && Aggressive) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
- if (N0.getOpcode() == ISD::FMA &&
+ if (N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL) {
- return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
N1));
}
// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
- if (N1->getOpcode() == ISD::FMA &&
+ if (N1->getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL) {
- return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(0), N1.getOperand(1),
- DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(2).getOperand(0),
N1.getOperand(2).getOperand(1),
N0));
}
+
+ if (LookThroughFPExt) {
+ // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ auto FoldFAddFMAFPExtFMul = [&] (
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+ Z));
+ };
+ if (N0.getOpcode() == PreferredFusedOpcode) {
+ SDValue N02 = N0.getOperand(2);
+ if (N02.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N020 = N02.getOperand(0);
+ if (N020.getOpcode() == ISD::FMUL)
+ return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
+ N020.getOperand(0), N020.getOperand(1),
+ N1);
+ }
+ }
+
+ // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ auto FoldFAddFPExtFMAFMul = [&] (
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+ Z));
+ };
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == PreferredFusedOpcode) {
+ SDValue N002 = N00.getOperand(2);
+ if (N002.getOpcode() == ISD::FMUL)
+ return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
+ N002.getOperand(0), N002.getOperand(1),
+ N1);
+ }
+ }
+
+ // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
+ // -> (fma y, z, (fma (fpext u), (fpext v), x))
+ if (N1.getOpcode() == PreferredFusedOpcode) {
+ SDValue N12 = N1.getOperand(2);
+ if (N12.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N120 = N12.getOperand(0);
+ if (N120.getOpcode() == ISD::FMUL)
+ return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
+ N120.getOperand(0), N120.getOperand(1),
+ N0);
+ }
+ }
+
+ // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
+ // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == PreferredFusedOpcode) {
+ SDValue N102 = N10.getOperand(2);
+ if (N102.getOpcode() == ISD::FMUL)
+ return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
+ N102.getOperand(0), N102.getOperand(1),
+ N0);
+ }
+ }
+ }
}
return SDValue();
}
-static SDValue performFsubFmulCombines(unsigned FusedOpcode,
- bool Aggressive,
- SDNode *N,
- const TargetLowering &TLI,
- SelectionDAG &DAG) {
+/// Try to perform FMA combining on a given FSUB node.
+SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
+
+
+
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc SL(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+ bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath);
+
+ // Floating-point multiply-add with intermediate rounding.
+ bool HasFMAD = (LegalOperations &&
+ TLI.isOperationLegal(ISD::FMAD, VT));
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA = ((!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ UnsafeFPMath);
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+ bool LookThroughFPExt = TLI.isFPExtFree(VT);
+
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
if (N0.getOpcode() == ISD::FMUL &&
(Aggressive || N0->hasOneUse())) {
- return DAG.getNode(FusedOpcode, SL, VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT, N1));
}
@@ -7138,7 +7292,7 @@ static SDValue performFsubFmulCombines(u
// Note: Commutes FSUB operands.
if (N1.getOpcode() == ISD::FMUL &&
(Aggressive || N1->hasOneUse()))
- return DAG.getNode(FusedOpcode, SL, VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),
N1.getOperand(1), N0);
@@ -7149,41 +7303,214 @@ static SDValue performFsubFmulCombines(u
(Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);
- return DAG.getNode(FusedOpcode, SL, VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
DAG.getNode(ISD::FNEG, SL, VT, N1));
}
+ // Look through FP_EXTEND nodes to do more combining.
+ if (UnsafeFPMath && LookThroughFPExt) {
+ // fold (fsub (fpext (fmul x, y)), z)
+ // -> (fma (fpext x), (fpext y), (fneg z))
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
+ }
+
+ // fold (fsub x, (fpext (fmul y, z)))
+ // -> (fma (fneg (fpext y)), (fpext z), x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(0))),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(1)),
+ N0);
+ }
+
+ // fold (fsub (fpext (fneg (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x), (fpext y), z))
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FNEG) {
+ SDValue N000 = N00.getOperand(0);
+ if (N000.getOpcode() == ISD::FMUL) {
+ return DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(1)),
+ N1));
+ }
+ }
+ }
+
+ // fold (fsub (fneg (fpext (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x)), (fpext y), z)
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (N0.getOpcode() == ISD::FNEG) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N000 = N00.getOperand(0);
+ if (N000.getOpcode() == ISD::FMUL) {
+ return DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(1)),
+ N1));
+ }
+ }
+ }
+
+ }
+
// More folding opportunities when target permits.
- if (Aggressive) {
+ if ((UnsafeFPMath || HasFMAD) && Aggressive) {
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
- if (N0.getOpcode() == FusedOpcode &&
+ if (N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL) {
- return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
N1)));
}
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
- if (N1.getOpcode() == FusedOpcode &&
+ if (N1.getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
- return DAG.getNode(FusedOpcode, SDLoc(N), VT,
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),
N1.getOperand(1),
- DAG.getNode(FusedOpcode, SDLoc(N), VT,
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N20),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N20),
+
N21, N0));
}
+
+ if (LookThroughFPExt) {
+ // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
+ if (N0.getOpcode() == PreferredFusedOpcode) {
+ SDValue N02 = N0.getOperand(2);
+ if (N02.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N020 = N02.getOperand(0);
+ if (N020.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N020.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N020.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1)));
+ }
+ }
+
+ // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y),
+ // (fma (fpext u), (fpext v), (fneg z)))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == PreferredFusedOpcode) {
+ SDValue N002 = N00.getOperand(2);
+ if (N002.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N002.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N002.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1)));
+ }
+ }
+
+ // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
+ // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
+ if (N1.getOpcode() == PreferredFusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
+ SDValue N120 = N1.getOperand(2).getOperand(0);
+ if (N120.getOpcode() == ISD::FMUL) {
+ SDValue N1200 = N120.getOperand(0);
+ SDValue N1201 = N120.getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+ N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL,
+ VT, N1200)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N1201),
+ N0));
+ }
+ }
+
+ // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
+ // -> (fma (fneg (fpext y)), (fpext z),
+ // (fma (fneg (fpext u)), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N1.getOpcode() == ISD::FP_EXTEND &&
+ N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ SDValue N102 = N1.getOperand(0).getOperand(2);
+ if (N102.getOpcode() == ISD::FMUL) {
+ SDValue N1020 = N102.getOperand(0);
+ SDValue N1021 = N102.getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N100)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL,
+ VT, N1020)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N1021),
+ N0));
+ }
+ }
+ }
}
return SDValue();
@@ -7327,55 +7654,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N
}
} // enable-unsafe-fp-math
- if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
- // Assume if there is an fmad instruction that it should be aggressively
- // used.
- if (SDValue Fused = performFaddFmulCombines(ISD::FMAD, true, N, TLI, DAG))
- return Fused;
- }
-
// FADD -> FMA combines:
- if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
- (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
-
- if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
- // Don't form FMA if we are preferring FMAD.
- if (SDValue Fused
- = performFaddFmulCombines(ISD::FMA,
- TLI.enableAggressiveFMAFusion(VT),
- N, TLI, DAG)) {
- return Fused;
- }
- }
-
- // When FP_EXTEND nodes are free on the target, and there is an opportunity
- // to combine into FMA, arrange such nodes accordingly.
- if (TLI.isFPExtFree(VT)) {
-
- // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N00.getOperand(1)), N1);
- }
-
- // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x)
- // Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
- SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N10.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N10.getOperand(1)), N0);
- }
- }
+ SDValue Fused = visitFADDForFMACombine(N);
+ if (Fused) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
}
return SDValue();
@@ -7436,96 +7719,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N
}
}
- if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
- // Assume if there is an fmad instruction that it should be aggressively
- // used.
- if (SDValue Fused = performFsubFmulCombines(ISD::FMAD, true, N, TLI, DAG))
- return Fused;
- }
-
// FSUB -> FMA combines:
- if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
- (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
-
- if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
- // Don't form FMA if we are preferring FMAD.
-
- if (SDValue Fused
- = performFsubFmulCombines(ISD::FMA,
- TLI.enableAggressiveFMAFusion(VT),
- N, TLI, DAG)) {
- return Fused;
- }
- }
-
- // When FP_EXTEND nodes are free on the target, and there is an opportunity
- // to combine into FMA, arrange such nodes accordingly.
- if (TLI.isFPExtFree(VT)) {
- // fold (fsub (fpext (fmul x, y)), z)
- // -> (fma (fpext x), (fpext y), (fneg z))
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N00.getOperand(1)),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
- }
-
- // fold (fsub x, (fpext (fmul y, z)))
- // -> (fma (fneg (fpext y)), (fpext z), x)
- // Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
- SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
- VT, N10.getOperand(0))),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N10.getOperand(1)),
- N0);
- }
-
- // fold (fsub (fpext (fneg (fmul, x, y))), z)
- // -> (fma (fneg (fpext x)), (fpext y), (fneg z))
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FNEG) {
- SDValue N000 = N00.getOperand(0);
- if (N000.getOpcode() == ISD::FMUL) {
- return DAG.getNode(ISD::FMA, dl, VT,
- DAG.getNode(ISD::FNEG, dl, VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
- VT, N000.getOperand(0))),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N000.getOperand(1)),
- DAG.getNode(ISD::FNEG, dl, VT, N1));
- }
- }
- }
-
- // fold (fsub (fneg (fpext (fmul, x, y))), z)
- // -> (fma (fneg (fpext x)), (fpext y), (fneg z))
- if (N0.getOpcode() == ISD::FNEG) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FP_EXTEND) {
- SDValue N000 = N00.getOperand(0);
- if (N000.getOpcode() == ISD::FMUL) {
- return DAG.getNode(ISD::FMA, dl, VT,
- DAG.getNode(ISD::FNEG, dl, VT,
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
- VT, N000.getOperand(0))),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
- N000.getOperand(1)),
- DAG.getNode(ISD::FNEG, dl, VT, N1));
- }
- }
- }
- }
+ SDValue Fused = visitFSUBForFMACombine(N);
+ if (Fused) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
}
return SDValue();
Modified: llvm/trunk/test/CodeGen/PowerPC/fma-assoc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fma-assoc.ll?rev=235344&r1=235343&r2=235344&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/fma-assoc.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/fma-assoc.ll Mon Apr 20 15:29:40 2015
@@ -3,11 +3,11 @@
define double @test_FMADD_ASSOC1(double %A, double %B, double %C,
double %D, double %E) {
- %F = fmul double %A, %B ; <double> [#uses=1]
- %G = fmul double %C, %D ; <double> [#uses=1]
- %H = fadd double %F, %G ; <double> [#uses=1]
- %I = fadd double %H, %E ; <double> [#uses=1]
- ret double %I
+ %F = fmul double %A, %B ; <double> [#uses=1]
+ %G = fmul double %C, %D ; <double> [#uses=1]
+ %H = fadd double %F, %G ; <double> [#uses=1]
+ %I = fadd double %H, %E ; <double> [#uses=1]
+ ret double %I
; CHECK-LABEL: test_FMADD_ASSOC1:
; CHECK: fmadd
; CHECK-NEXT: fmadd
@@ -22,11 +22,11 @@ define double @test_FMADD_ASSOC1(double
define double @test_FMADD_ASSOC2(double %A, double %B, double %C,
double %D, double %E) {
- %F = fmul double %A, %B ; <double> [#uses=1]
- %G = fmul double %C, %D ; <double> [#uses=1]
- %H = fadd double %F, %G ; <double> [#uses=1]
- %I = fadd double %E, %H ; <double> [#uses=1]
- ret double %I
+ %F = fmul double %A, %B ; <double> [#uses=1]
+ %G = fmul double %C, %D ; <double> [#uses=1]
+ %H = fadd double %F, %G ; <double> [#uses=1]
+ %I = fadd double %E, %H ; <double> [#uses=1]
+ ret double %I
; CHECK-LABEL: test_FMADD_ASSOC2:
; CHECK: fmadd
; CHECK-NEXT: fmadd
@@ -41,11 +41,11 @@ define double @test_FMADD_ASSOC2(double
define double @test_FMSUB_ASSOC1(double %A, double %B, double %C,
double %D, double %E) {
- %F = fmul double %A, %B ; <double> [#uses=1]
- %G = fmul double %C, %D ; <double> [#uses=1]
- %H = fadd double %F, %G ; <double> [#uses=1]
- %I = fsub double %H, %E ; <double> [#uses=1]
- ret double %I
+ %F = fmul double %A, %B ; <double> [#uses=1]
+ %G = fmul double %C, %D ; <double> [#uses=1]
+ %H = fadd double %F, %G ; <double> [#uses=1]
+ %I = fsub double %H, %E ; <double> [#uses=1]
+ ret double %I
; CHECK-LABEL: test_FMSUB_ASSOC1:
; CHECK: fmsub
; CHECK-NEXT: fmadd
@@ -60,11 +60,11 @@ define double @test_FMSUB_ASSOC1(double
define double @test_FMSUB_ASSOC2(double %A, double %B, double %C,
double %D, double %E) {
- %F = fmul double %A, %B ; <double> [#uses=1]
- %G = fmul double %C, %D ; <double> [#uses=1]
- %H = fadd double %F, %G ; <double> [#uses=1]
- %I = fsub double %E, %H ; <double> [#uses=1]
- ret double %I
+ %F = fmul double %A, %B ; <double> [#uses=1]
+ %G = fmul double %C, %D ; <double> [#uses=1]
+ %H = fadd double %F, %G ; <double> [#uses=1]
+ %I = fsub double %E, %H ; <double> [#uses=1]
+ ret double %I
; CHECK-LABEL: test_FMSUB_ASSOC2:
; CHECK: fnmsub
; CHECK-NEXT: fnmsub
@@ -77,3 +77,159 @@ define double @test_FMSUB_ASSOC2(double
; CHECK-VSX-NEXT: blr
}
+define double @test_FMADD_ASSOC_EXT1(float %A, float %B, double %C,
+ double %D, double %E) {
+ %F = fmul float %A, %B ; <float> [#uses=1]
+ %G = fpext float %F to double ; <double> [#uses=1]
+ %H = fmul double %C, %D ; <double> [#uses=1]
+ %I = fadd double %H, %G ; <double> [#uses=1]
+ %J = fadd double %I, %E ; <double> [#uses=1]
+ ret double %J
+; CHECK-LABEL: test_FMADD_ASSOC_EXT1:
+; CHECK: fmadd
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT1:
+; CHECK-VSX: xsmaddmdp
+; CHECK-VSX-NEXT: xsmaddadp
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMADD_ASSOC_EXT2(float %A, float %B, float %C,
+ float %D, double %E) {
+ %F = fmul float %A, %B ; <float> [#uses=1]
+ %G = fmul float %C, %D ; <float> [#uses=1]
+ %H = fadd float %F, %G ; <float> [#uses=1]
+ %I = fpext float %H to double ; <double> [#uses=1]
+ %J = fadd double %I, %E ; <double> [#uses=1]
+ ret double %J
+; CHECK-LABEL: test_FMADD_ASSOC_EXT2:
+; CHECK: fmadd
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT2:
+; CHECK-VSX: xsmaddmdp
+; CHECK-VSX-NEXT: xsmaddadp
+; CHECK-VSX-NEXT: fmr
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMADD_ASSOC_EXT3(float %A, float %B, double %C,
+ double %D, double %E) {
+ %F = fmul float %A, %B ; <float> [#uses=1]
+ %G = fpext float %F to double ; <double> [#uses=1]
+ %H = fmul double %C, %D ; <double> [#uses=1]
+ %I = fadd double %H, %G ; <double> [#uses=1]
+ %J = fadd double %E, %I ; <double> [#uses=1]
+ ret double %J
+; CHECK-LABEL: test_FMADD_ASSOC_EXT3:
+; CHECK: fmadd
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT3:
+; CHECK-VSX: xsmaddmdp
+; CHECK-VSX-NEXT: xsmaddadp
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMADD_ASSOC_EXT4(float %A, float %B, float %C,
+ float %D, double %E) {
+ %F = fmul float %A, %B ; <float> [#uses=1]
+ %G = fmul float %C, %D ; <float> [#uses=1]
+ %H = fadd float %F, %G ; <float> [#uses=1]
+ %I = fpext float %H to double ; <double> [#uses=1]
+ %J = fadd double %E, %I ; <double> [#uses=1]
+ ret double %J
+; CHECK-LABEL: test_FMADD_ASSOC_EXT4:
+; CHECK: fmadd
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT4:
+; CHECK-VSX: xsmaddmdp
+; CHECK-VSX-NEXT: xsmaddadp
+; CHECK-VSX-NEXT: fmr
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMSUB_ASSOC_EXT1(float %A, float %B, double %C,
+ double %D, double %E) {
+ %F = fmul float %A, %B ; <float> [#uses=1]
+ %G = fpext float %F to double ; <double> [#uses=1]
+ %H = fmul double %C, %D ; <double> [#uses=1]
+ %I = fadd double %H, %G ; <double> [#uses=1]
+ %J = fsub double %I, %E ; <double> [#uses=1]
+ ret double %J
+; CHECK-LABEL: test_FMSUB_ASSOC_EXT1:
+; CHECK: fmsub
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT1:
+; CHECK-VSX: xsmsubmdp
+; CHECK-VSX-NEXT: xsmaddadp
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMSUB_ASSOC_EXT2(float %A, float %B, float %C,
+ float %D, double %E) {
+ %F = fmul float %A, %B ; <float> [#uses=1]
+ %G = fmul float %C, %D ; <float> [#uses=1]
+ %H = fadd float %F, %G ; <float> [#uses=1]
+ %I = fpext float %H to double ; <double> [#uses=1]
+ %J = fsub double %I, %E ; <double> [#uses=1]
+ ret double %J
+; CHECK-LABEL: test_FMSUB_ASSOC_EXT2:
+; CHECK: fmsub
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT2:
+; CHECK-VSX: xsmsubmdp
+; CHECK-VSX-NEXT: xsmaddadp
+; CHECK-VSX-NEXT: fmr
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMSUB_ASSOC_EXT3(float %A, float %B, double %C,
+ double %D, double %E) {
+ %F = fmul float %A, %B ; <float> [#uses=1]
+ %G = fpext float %F to double ; <double> [#uses=1]
+ %H = fmul double %C, %D ; <double> [#uses=1]
+ %I = fadd double %H, %G ; <double> [#uses=1]
+ %J = fsub double %E, %I ; <double> [#uses=1]
+ ret double %J
+; CHECK-LABEL: test_FMSUB_ASSOC_EXT3:
+; CHECK: fnmsub
+; CHECK-NEXT: fnmsub
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT3:
+; CHECK-VSX: xsnmsubmdp
+; CHECK-VSX-NEXT: xsnmsubadp
+; CHECK-VSX-NEXT: fmr
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMSUB_ASSOC_EXT4(float %A, float %B, float %C,
+ float %D, double %E) {
+ %F = fmul float %A, %B ; <float> [#uses=1]
+ %G = fmul float %C, %D ; <float> [#uses=1]
+ %H = fadd float %F, %G ; <float> [#uses=1]
+ %I = fpext float %H to double ; <double> [#uses=1]
+ %J = fsub double %E, %I ; <double> [#uses=1]
+ ret double %J
+; CHECK-LABEL: test_FMSUB_ASSOC_EXT4:
+; CHECK: fnmsub
+; CHECK-NEXT: fnmsub
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT4:
+; CHECK-VSX: xsnmsubmdp
+; CHECK-VSX-NEXT: xsnmsubadp
+; CHECK-VSX-NEXT: fmr
+; CHECK-VSX-NEXT: blr
+}
Modified: llvm/trunk/test/CodeGen/PowerPC/fma-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fma-ext.ll?rev=235344&r1=235343&r2=235344&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/fma-ext.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/fma-ext.ll Mon Apr 20 15:29:40 2015
@@ -60,34 +60,34 @@ define double @test_FMSUB_EXT2(float %A,
define double @test_FMSUB_EXT3(float %A, float %B, double %C) {
%D = fmul float %A, %B ; <float> [#uses=1]
- %E = fsub float -0.000000e+00, %D ; <float> [#uses=1]
+ %E = fsub float -0.000000e+00, %D ; <float> [#uses=1]
%F = fpext float %E to double ; <double> [#uses=1]
%G = fsub double %F, %C ; <double> [#uses=1]
ret double %G
; CHECK-LABEL: test_FMSUB_EXT3:
-; CHECK: fneg
-; CHECK-NEXT: fmsub
+; CHECK: fnmadd
+
; CHECK-NEXT: blr
; CHECK-VSX-LABEL: test_FMSUB_EXT3:
-; CHECK-VSX: xsnegdp
-; CHECK-VSX-NEXT: xsmsubmdp
+; CHECK-VSX: xsnmaddmdp
+
; CHECK-VSX-NEXT: blr
}
define double @test_FMSUB_EXT4(float %A, float %B, double %C) {
%D = fmul float %A, %B ; <float> [#uses=1]
%E = fpext float %D to double ; <double> [#uses=1]
- %F = fsub double -0.000000e+00, %E ; <double> [#uses=1]
+ %F = fsub double -0.000000e+00, %E ; <double> [#uses=1]
%G = fsub double %F, %C ; <double> [#uses=1]
ret double %G
; CHECK-LABEL: test_FMSUB_EXT4:
-; CHECK: fneg
-; CHECK-NEXT: fmsub
+; CHECK: fnmadd
+
; CHECK-NEXT: blr
; CHECK-VSX-LABEL: test_FMSUB_EXT4:
-; CHECK-VSX: xsnegdp
-; CHECK-VSX-NEXT: xsmsubmdp
+; CHECK-VSX: xsnmaddmdp
+
; CHECK-VSX-NEXT: blr
-}
\ No newline at end of file
+}
More information about the llvm-commits
mailing list