<div dir="ltr">Thanks for the precision Hal. Will fix this.<div><br></div><div>Olivier</div></div><div class="gmail_extra"><br><div class="gmail_quote">2015-04-10 11:16 GMT-04:00 Hal Finkel <span dir="ltr"><<a href="mailto:hfinkel@anl.gov" target="_blank">hfinkel@anl.gov</a>></span>:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class="">----- Original Message -----<br>
> From: "Olivier Sallenave" <<a href="mailto:ol.sall@gmail.com">ol.sall@gmail.com</a>><br>
> To: "Rafael Espíndola" <<a href="mailto:rafael.espindola@gmail.com">rafael.espindola@gmail.com</a>><br>
> Cc: <a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>
> Sent: Friday, April 10, 2015 10:02:06 AM<br>
> Subject: Re: [llvm] r234513 - Refactoring and enhancement to FMA combine.<br>
><br>
><br>
><br>
> Sorry about that. Not sure to understand though, amcgcn seem like an<br>
> out-of-tree target, why are regression tests in the tree in the<br>
> first place?<br>
<br>
</span>No, the test that is failing is test/CodeGen/R600/mad-combine.ll -- amdgcn is Triple::amdgcn, which is handled by the R600 backend.<br>
<br>
-Hal<br>
<div class="HOEnZb"><div class="h5"><br>
><br>
><br>
> Thanks,<br>
><br>
><br>
> Olivier<br>
><br>
><br>
> 2015-04-09 14:33 GMT-04:00 Rafael Espíndola <<br>
> <a href="mailto:rafael.espindola@gmail.com">rafael.espindola@gmail.com</a> > :<br>
><br>
><br>
> Reverted since this was failing on the bots:<br>
><br>
> <a href="http://lab.llvm.org:8011/builders/clang-x86_64-debian-fast/builds/25489" target="_blank">http://lab.llvm.org:8011/builders/clang-x86_64-debian-fast/builds/25489</a><br>
><br>
><br>
><br>
> On 9 April 2015 at 13:55, Olivier Sallenave < <a href="mailto:ohsallen@us.ibm.com">ohsallen@us.ibm.com</a> ><br>
> wrote:<br>
> > Author: ohsallen<br>
> > Date: Thu Apr 9 12:55:26 2015<br>
> > New Revision: 234513<br>
> ><br>
> > URL: <a href="http://llvm.org/viewvc/llvm-project?rev=234513&view=rev" target="_blank">http://llvm.org/viewvc/llvm-project?rev=234513&view=rev</a><br>
> > Log:<br>
> > Refactoring and enhancement to FMA combine.<br>
> ><br>
> > Modified:<br>
> > llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp<br>
> > llvm/trunk/test/CodeGen/PowerPC/fma-assoc.ll<br>
> > llvm/trunk/test/CodeGen/PowerPC/fma-ext.ll<br>
> ><br>
> > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp<br>
> > URL:<br>
> > <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=234513&r1=234512&r2=234513&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=234513&r1=234512&r2=234513&view=diff</a><br>
> > ==============================================================================<br>
> > --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)<br>
> > +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Apr 9<br>
> > 12:55:26 2015<br>
> > @@ -308,6 +308,9 @@ namespace {<br>
> > SDValue visitMLOAD(SDNode *N);<br>
> > SDValue visitMSTORE(SDNode *N);<br>
> ><br>
> > + SDValue visitFADDForFMACombine(SDNode *N);<br>
> > + SDValue visitFSUBForFMACombine(SDNode *N);<br>
> > +<br>
> > SDValue XformToShuffleWithZero(SDNode *N);<br>
> > SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue<br>
> > RHS);<br>
> ><br>
> > @@ -7057,20 +7060,40 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode<br>
> > return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);<br>
> > }<br>
> ><br>
> > -// Attempt different variants of (fadd (fmul a, b), c) -> fma or<br>
> > fmad<br>
> > -static SDValue performFaddFmulCombines(unsigned FusedOpcode,<br>
> > - bool Aggressive,<br>
> > - SDNode *N,<br>
> > - const TargetLowering &TLI,<br>
> > - SelectionDAG &DAG) {<br>
> > +/// Try to perform FMA combining on a given FADD node.<br>
> > +SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {<br>
> > SDValue N0 = N->getOperand(0);<br>
> > SDValue N1 = N->getOperand(1);<br>
> > EVT VT = N->getValueType(0);<br>
> > + SDLoc SL(N);<br>
> > +<br>
> > + const TargetOptions &Options = DAG.getTarget().Options;<br>
> > + bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast<br>
> > ||<br>
> > + Options.UnsafeFPMath);<br>
> > +<br>
> > + // Floating-point multiply-add with intermediate rounding.<br>
> > + bool HasFMAD = (LegalOperations &&<br>
> > + TLI.isOperationLegal(ISD::FMAD, VT));<br>
> > +<br>
> > + // Floating-point multiply-add without intermediate rounding.<br>
> > + bool HasFMA = ((!LegalOperations ||<br>
> > + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&<br>
> > + TLI.isFMAFasterThanFMulAndFAdd(VT) &&<br>
> > + UnsafeFPMath);<br>
> > +<br>
> > + // No valid opcode, do not combine.<br>
> > + if (!HasFMAD && !HasFMA)<br>
> > + return SDValue();<br>
> > +<br>
> > + // Always prefer FMAD to FMA for precision.<br>
> > + unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD :<br>
> > ISD::FMA;<br>
> > + bool Aggressive = TLI.enableAggressiveFMAFusion(VT);<br>
> > + bool LookThroughFPExt = TLI.isFPExtFree(VT);<br>
> ><br>
> > // fold (fadd (fmul x, y), z) -> (fma x, y, z)<br>
> > if (N0.getOpcode() == ISD::FMUL &&<br>
> > (Aggressive || N0->hasOneUse())) {<br>
> > - return DAG.getNode(FusedOpcode, SDLoc(N), VT,<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > N0.getOperand(0), N0.getOperand(1), N1);<br>
> > }<br>
> ><br>
> > @@ -7078,53 +7101,176 @@ static SDValue performFaddFmulCombines(u<br>
> > // Note: Commutes FADD operands.<br>
> > if (N1.getOpcode() == ISD::FMUL &&<br>
> > (Aggressive || N1->hasOneUse())) {<br>
> > - return DAG.getNode(FusedOpcode, SDLoc(N), VT,<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > N1.getOperand(0), N1.getOperand(1), N0);<br>
> > }<br>
> ><br>
> > + // Look through FP_EXTEND nodes to do more combining.<br>
> > + if (UnsafeFPMath && LookThroughFPExt) {<br>
> > + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext<br>
> > y), z)<br>
> > + if (N0.getOpcode() == ISD::FP_EXTEND) {<br>
> > + SDValue N00 = N0.getOperand(0);<br>
> > + if (N00.getOpcode() == ISD::FMUL)<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N00.getOperand(0)),<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N00.getOperand(1)), N1);<br>
> > + }<br>
> > +<br>
> > + // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext<br>
> > z), x)<br>
> > + // Note: Commutes FADD operands.<br>
> > + if (N1.getOpcode() == ISD::FP_EXTEND) {<br>
> > + SDValue N10 = N1.getOperand(0);<br>
> > + if (N10.getOpcode() == ISD::FMUL)<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N10.getOperand(0)),<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N10.getOperand(1)), N0);<br>
> > + }<br>
> > + }<br>
> > +<br>
> > // More folding opportunities when target permits.<br>
> > - if (Aggressive) {<br>
> > + if (UnsafeFPMath && Aggressive) {<br>
> > // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v,<br>
> > z))<br>
> > - if (N0.getOpcode() == ISD::FMA &&<br>
> > + if (N0.getOpcode() == PreferredFusedOpcode &&<br>
> > N0.getOperand(2).getOpcode() == ISD::FMUL) {<br>
> > - return DAG.getNode(FusedOpcode, SDLoc(N), VT,<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > N0.getOperand(0), N0.getOperand(1),<br>
> > - DAG.getNode(FusedOpcode, SDLoc(N), VT,<br>
> > + DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > N0.getOperand(2).getOperand(0),<br>
> > N0.getOperand(2).getOperand(1),<br>
> > N1));<br>
> > }<br>
> ><br>
> > // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v,<br>
> > x))<br>
> > - if (N1->getOpcode() == ISD::FMA &&<br>
> > + if (N1->getOpcode() == PreferredFusedOpcode &&<br>
> > N1.getOperand(2).getOpcode() == ISD::FMUL) {<br>
> > - return DAG.getNode(FusedOpcode, SDLoc(N), VT,<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > N1.getOperand(0), N1.getOperand(1),<br>
> > - DAG.getNode(FusedOpcode, SDLoc(N), VT,<br>
> > + DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > N1.getOperand(2).getOperand(0),<br>
> > N1.getOperand(2).getOperand(1),<br>
> > N0));<br>
> > }<br>
> > +<br>
> > + if (LookThroughFPExt) {<br>
> > + // fold (fadd (fma x, y, (fpext (fmul u, v))), z)<br>
> > + // -> (fma x, y, (fma (fpext u), (fpext v), z))<br>
> > + auto FoldFAddFMAFPExtFMul = [&] (<br>
> > + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,<br>
> > + DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT, U),<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT, V),<br>
> > + Z));<br>
> > + };<br>
> > + if (N0.getOpcode() == PreferredFusedOpcode) {<br>
> > + SDValue N02 = N0.getOperand(2);<br>
> > + if (N02.getOpcode() == ISD::FP_EXTEND) {<br>
> > + SDValue N020 = N02.getOperand(0);<br>
> > + if (N020.getOpcode() == ISD::FMUL)<br>
> > + return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),<br>
> > + N020.getOperand(0), N020.getOperand(1),<br>
> > + N1);<br>
> > + }<br>
> > + }<br>
> > +<br>
> > + // fold (fadd (fpext (fma x, y, (fmul u, v))), z)<br>
> > + // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))<br>
> > + // FIXME: This turns two single-precision and one<br>
> > double-precision<br>
> > + // operation into two double-precision operations, which might<br>
> > not be<br>
> > + // interesting for all targets, especially GPUs.<br>
> > + auto FoldFAddFPExtFMAFMul = [&] (<br>
> > + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT, X),<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),<br>
> > + DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT, U),<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT, V),<br>
> > + Z));<br>
> > + };<br>
> > + if (N0.getOpcode() == ISD::FP_EXTEND) {<br>
> > + SDValue N00 = N0.getOperand(0);<br>
> > + if (N00.getOpcode() == PreferredFusedOpcode) {<br>
> > + SDValue N002 = N00.getOperand(2);<br>
> > + if (N002.getOpcode() == ISD::FMUL)<br>
> > + return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),<br>
> > + N002.getOperand(0), N002.getOperand(1),<br>
> > + N1);<br>
> > + }<br>
> > + }<br>
> > +<br>
> > + // fold (fadd x, (fma y, z, (fpext (fmul u, v)))<br>
> > + // -> (fma y, z, (fma (fpext u), (fpext v), x))<br>
> > + if (N1.getOpcode() == PreferredFusedOpcode) {<br>
> > + SDValue N12 = N1.getOperand(2);<br>
> > + if (N12.getOpcode() == ISD::FP_EXTEND) {<br>
> > + SDValue N120 = N12.getOperand(0);<br>
> > + if (N120.getOpcode() == ISD::FMUL)<br>
> > + return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),<br>
> > + N120.getOperand(0), N120.getOperand(1),<br>
> > + N0);<br>
> > + }<br>
> > + }<br>
> > +<br>
> > + // fold (fadd x, (fpext (fma y, z, (fmul u, v)))<br>
> > + // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))<br>
> > + // FIXME: This turns two single-precision and one<br>
> > double-precision<br>
> > + // operation into two double-precision operations, which might<br>
> > not be<br>
> > + // interesting for all targets, especially GPUs.<br>
> > + if (N1.getOpcode() == ISD::FP_EXTEND) {<br>
> > + SDValue N10 = N1.getOperand(0);<br>
> > + if (N10.getOpcode() == PreferredFusedOpcode) {<br>
> > + SDValue N102 = N10.getOperand(2);<br>
> > + if (N102.getOpcode() == ISD::FMUL)<br>
> > + return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),<br>
> > + N102.getOperand(0), N102.getOperand(1),<br>
> > + N0);<br>
> > + }<br>
> > + }<br>
> > + }<br>
> > }<br>
> ><br>
> > return SDValue();<br>
> > }<br>
> ><br>
> > -static SDValue performFsubFmulCombines(unsigned FusedOpcode,<br>
> > - bool Aggressive,<br>
> > - SDNode *N,<br>
> > - const TargetLowering &TLI,<br>
> > - SelectionDAG &DAG) {<br>
> > +/// Try to perform FMA combining on a given FSUB node.<br>
> > +SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {<br>
> > SDValue N0 = N->getOperand(0);<br>
> > SDValue N1 = N->getOperand(1);<br>
> > EVT VT = N->getValueType(0);<br>
> > -<br>
> > SDLoc SL(N);<br>
> ><br>
> > + const TargetOptions &Options = DAG.getTarget().Options;<br>
> > + bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast<br>
> > ||<br>
> > + Options.UnsafeFPMath);<br>
> > +<br>
> > + // Floating-point multiply-add with intermediate rounding.<br>
> > + bool HasFMAD = (LegalOperations &&<br>
> > + TLI.isOperationLegal(ISD::FMAD, VT));<br>
> > +<br>
> > + // Floating-point multiply-add without intermediate rounding.<br>
> > + bool HasFMA = ((!LegalOperations ||<br>
> > + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&<br>
> > + TLI.isFMAFasterThanFMulAndFAdd(VT) &&<br>
> > + UnsafeFPMath);<br>
> > +<br>
> > + // No valid opcode, do not combine.<br>
> > + if (!HasFMAD && !HasFMA)<br>
> > + return SDValue();<br>
> > +<br>
> > + // Always prefer FMAD to FMA for precision.<br>
> > + unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD :<br>
> > ISD::FMA;<br>
> > + bool Aggressive = TLI.enableAggressiveFMAFusion(VT);<br>
> > + bool LookThroughFPExt = TLI.isFPExtFree(VT);<br>
> > +<br>
> > // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))<br>
> > if (N0.getOpcode() == ISD::FMUL &&<br>
> > (Aggressive || N0->hasOneUse())) {<br>
> > - return DAG.getNode(FusedOpcode, SL, VT,<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > N0.getOperand(0), N0.getOperand(1),<br>
> > DAG.getNode(ISD::FNEG, SL, VT, N1));<br>
> > }<br>
> > @@ -7133,7 +7279,7 @@ static SDValue performFsubFmulCombines(u<br>
> > // Note: Commutes FSUB operands.<br>
> > if (N1.getOpcode() == ISD::FMUL &&<br>
> > (Aggressive || N1->hasOneUse()))<br>
> > - return DAG.getNode(FusedOpcode, SL, VT,<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > DAG.getNode(ISD::FNEG, SL, VT,<br>
> > N1.getOperand(0)),<br>
> > N1.getOperand(1), N0);<br>
> > @@ -7144,41 +7290,213 @@ static SDValue performFsubFmulCombines(u<br>
> > (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse())))<br>
> > {<br>
> > SDValue N00 = N0.getOperand(0).getOperand(0);<br>
> > SDValue N01 = N0.getOperand(0).getOperand(1);<br>
> > - return DAG.getNode(FusedOpcode, SL, VT,<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > DAG.getNode(ISD::FNEG, SL, VT, N00), N01,<br>
> > DAG.getNode(ISD::FNEG, SL, VT, N1));<br>
> > }<br>
> ><br>
> > + // Look through FP_EXTEND nodes to do more combining.<br>
> > + if (UnsafeFPMath && LookThroughFPExt) {<br>
> > + // fold (fsub (fpext (fmul x, y)), z)<br>
> > + // -> (fma (fpext x), (fpext y), (fneg z))<br>
> > + if (N0.getOpcode() == ISD::FP_EXTEND) {<br>
> > + SDValue N00 = N0.getOperand(0);<br>
> > + if (N00.getOpcode() == ISD::FMUL)<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N00.getOperand(0)),<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N00.getOperand(1)),<br>
> > + DAG.getNode(ISD::FNEG, SL, VT, N1));<br>
> > + }<br>
> > +<br>
> > + // fold (fsub x, (fpext (fmul y, z)))<br>
> > + // -> (fma (fneg (fpext y)), (fpext z), x)<br>
> > + // Note: Commutes FSUB operands.<br>
> > + if (N1.getOpcode() == ISD::FP_EXTEND) {<br>
> > + SDValue N10 = N1.getOperand(0);<br>
> > + if (N10.getOpcode() == ISD::FMUL)<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FNEG, SL, VT,<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N10.getOperand(0))),<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N10.getOperand(1)),<br>
> > + N0);<br>
> > + }<br>
> > +<br>
> > + // fold (fsub (fpext (fneg (fmul, x, y))), z)<br>
> > + // -> (fneg (fma (fpext x), (fpext y), z))<br>
> > + // Note: This could be removed with appropriate canonicalization<br>
> > of the<br>
> > + // input expression into (fneg (fadd (fpext (fmul, x, y)), z).<br>
> > However, the<br>
> > + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math<br>
> > prevent<br>
> > + // from implementing the canonicalization in visitFSUB.<br>
> > + if (N0.getOpcode() == ISD::FP_EXTEND) {<br>
> > + SDValue N00 = N0.getOperand(0);<br>
> > + if (N00.getOpcode() == ISD::FNEG) {<br>
> > + SDValue N000 = N00.getOperand(0);<br>
> > + if (N000.getOpcode() == ISD::FMUL) {<br>
> > + return DAG.getNode(ISD::FNEG, SL, VT,<br>
> > + DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N000.getOperand(0)),<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N000.getOperand(1)),<br>
> > + N1));<br>
> > + }<br>
> > + }<br>
> > + }<br>
> > +<br>
> > + // fold (fsub (fneg (fpext (fmul, x, y))), z)<br>
> > + // -> (fneg (fma (fpext x)), (fpext y), z)<br>
> > + // Note: This could be removed with appropriate canonicalization<br>
> > of the<br>
> > + // input expression into (fneg (fadd (fpext (fmul, x, y)), z).<br>
> > However, the<br>
> > + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math<br>
> > prevent<br>
> > + // from implementing the canonicalization in visitFSUB.<br>
> > + if (N0.getOpcode() == ISD::FNEG) {<br>
> > + SDValue N00 = N0.getOperand(0);<br>
> > + if (N00.getOpcode() == ISD::FP_EXTEND) {<br>
> > + SDValue N000 = N00.getOperand(0);<br>
> > + if (N000.getOpcode() == ISD::FMUL) {<br>
> > + return DAG.getNode(ISD::FNEG, SL, VT,<br>
> > + DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N000.getOperand(0)),<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N000.getOperand(1)),<br>
> > + N1));<br>
> > + }<br>
> > + }<br>
> > + }<br>
> > +<br>
> > + }<br>
> > +<br>
> > // More folding opportunities when target permits.<br>
> > - if (Aggressive) {<br>
> > + if (UnsafeFPMath && Aggressive) {<br>
> > // fold (fsub (fma x, y, (fmul u, v)), z)<br>
> > // -> (fma x, y (fma u, v, (fneg z)))<br>
> > - if (N0.getOpcode() == FusedOpcode &&<br>
> > + if (N0.getOpcode() == PreferredFusedOpcode &&<br>
> > N0.getOperand(2).getOpcode() == ISD::FMUL) {<br>
> > - return DAG.getNode(FusedOpcode, SDLoc(N), VT,<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > N0.getOperand(0), N0.getOperand(1),<br>
> > - DAG.getNode(FusedOpcode, SDLoc(N), VT,<br>
> > + DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > N0.getOperand(2).getOperand(0),<br>
> > N0.getOperand(2).getOperand(1),<br>
> > - DAG.getNode(ISD::FNEG, SDLoc(N), VT,<br>
> > + DAG.getNode(ISD::FNEG, SL, VT,<br>
> > N1)));<br>
> > }<br>
> ><br>
> > // fold (fsub x, (fma y, z, (fmul u, v)))<br>
> > // -> (fma (fneg y), z, (fma (fneg u), v, x))<br>
> > - if (N1.getOpcode() == FusedOpcode &&<br>
> > + if (N1.getOpcode() == PreferredFusedOpcode &&<br>
> > N1.getOperand(2).getOpcode() == ISD::FMUL) {<br>
> > SDValue N20 = N1.getOperand(2).getOperand(0);<br>
> > SDValue N21 = N1.getOperand(2).getOperand(1);<br>
> > - return DAG.getNode(FusedOpcode, SDLoc(N), VT,<br>
> > - DAG.getNode(ISD::FNEG, SDLoc(N), VT,<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FNEG, SL, VT,<br>
> > N1.getOperand(0)),<br>
> > N1.getOperand(1),<br>
> > - DAG.getNode(FusedOpcode, SDLoc(N), VT,<br>
> > - DAG.getNode(ISD::FNEG, SDLoc(N), VT,<br>
> > - N20),<br>
> > + DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FNEG, SL, VT, N20),<br>
> > N21, N0));<br>
> > }<br>
> > +<br>
> > + if (LookThroughFPExt) {<br>
> > + // fold (fsub (fma x, y, (fpext (fmul u, v))), z)<br>
> > + // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))<br>
> > + if (N0.getOpcode() == PreferredFusedOpcode) {<br>
> > + SDValue N02 = N0.getOperand(2);<br>
> > + if (N02.getOpcode() == ISD::FP_EXTEND) {<br>
> > + SDValue N020 = N02.getOperand(0);<br>
> > + if (N020.getOpcode() == ISD::FMUL)<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + N0.getOperand(0), N0.getOperand(1),<br>
> > + DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N020.getOperand(0)),<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N020.getOperand(1)),<br>
> > + DAG.getNode(ISD::FNEG, SL, VT,<br>
> > + N1)));<br>
> > + }<br>
> > + }<br>
> > +<br>
> > + // fold (fsub (fpext (fma x, y, (fmul u, v))), z)<br>
> > + // -> (fma (fpext x), (fpext y),<br>
> > + // (fma (fpext u), (fpext v), (fneg z)))<br>
> > + // FIXME: This turns two single-precision and one<br>
> > double-precision<br>
> > + // operation into two double-precision operations, which might<br>
> > not be<br>
> > + // interesting for all targets, especially GPUs.<br>
> > + if (N0.getOpcode() == ISD::FP_EXTEND) {<br>
> > + SDValue N00 = N0.getOperand(0);<br>
> > + if (N00.getOpcode() == PreferredFusedOpcode) {<br>
> > + SDValue N002 = N00.getOperand(2);<br>
> > + if (N002.getOpcode() == ISD::FMUL)<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N00.getOperand(0)),<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N00.getOperand(1)),<br>
> > + DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N002.getOperand(0)),<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N002.getOperand(1)),<br>
> > + DAG.getNode(ISD::FNEG, SL, VT,<br>
> > + N1)));<br>
> > + }<br>
> > + }<br>
> > +<br>
> > + // fold (fsub x, (fma y, z, (fpext (fmul u, v))))<br>
> > + // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))<br>
> > + if (N1.getOpcode() == PreferredFusedOpcode &&<br>
> > + N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {<br>
> > + SDValue N120 = N1.getOperand(2).getOperand(0);<br>
> > + if (N120.getOpcode() == ISD::FMUL) {<br>
> > + SDValue N1200 = N120.getOperand(0);<br>
> > + SDValue N1201 = N120.getOperand(1);<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),<br>
> > + N1.getOperand(1),<br>
> > + DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FNEG, SL, VT,<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL,<br>
> > + VT, N1200)),<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N1201),<br>
> > + N0));<br>
> > + }<br>
> > + }<br>
> > +<br>
> > + // fold (fsub x, (fpext (fma y, z, (fmul u, v))))<br>
> > + // -> (fma (fneg (fpext y)), (fpext z),<br>
> > + // (fma (fneg (fpext u)), (fpext v), x))<br>
> > + // FIXME: This turns two single-precision and one<br>
> > double-precision<br>
> > + // operation into two double-precision operations, which might<br>
> > not be<br>
> > + // interesting for all targets, especially GPUs.<br>
> > + if (N1.getOpcode() == ISD::FP_EXTEND &&<br>
> > + N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {<br>
> > + SDValue N100 = N1.getOperand(0).getOperand(0);<br>
> > + SDValue N101 = N1.getOperand(0).getOperand(1);<br>
> > + SDValue N102 = N1.getOperand(0).getOperand(2);<br>
> > + if (N102.getOpcode() == ISD::FMUL) {<br>
> > + SDValue N1020 = N102.getOperand(0);<br>
> > + SDValue N1021 = N102.getOperand(1);<br>
> > + return DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FNEG, SL, VT,<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N100)),<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),<br>
> > + DAG.getNode(PreferredFusedOpcode, SL, VT,<br>
> > + DAG.getNode(ISD::FNEG, SL, VT,<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL,<br>
> > + VT, N1020)),<br>
> > + DAG.getNode(ISD::FP_EXTEND, SL, VT,<br>
> > + N1021),<br>
> > + N0));<br>
> > + }<br>
> > + }<br>
> > + }<br>
> > }<br>
> ><br>
> > return SDValue();<br>
> > @@ -7322,55 +7640,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N<br>
> > }<br>
> > } // enable-unsafe-fp-math<br>
> ><br>
> > - if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {<br>
> > - // Assume if there is an fmad instruction that it should be<br>
> > aggressively<br>
> > - // used.<br>
> > - if (SDValue Fused = performFaddFmulCombines(ISD::FMAD, true, N,<br>
> > TLI, DAG))<br>
> > - return Fused;<br>
> > - }<br>
> > -<br>
> > // FADD -> FMA combines:<br>
> > - if ((Options.AllowFPOpFusion == FPOpFusion::Fast ||<br>
> > Options.UnsafeFPMath) &&<br>
> > - TLI.isFMAFasterThanFMulAndFAdd(VT) &&<br>
> > - (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)))<br>
> > {<br>
> > -<br>
> > - if (!TLI.isOperationLegal(ISD::FMAD, VT)) {<br>
> > - // Don't form FMA if we are preferring FMAD.<br>
> > - if (SDValue Fused<br>
> > - = performFaddFmulCombines(ISD::FMA,<br>
> > - TLI.enableAggressiveFMAFusion(VT),<br>
> > - N, TLI, DAG)) {<br>
> > - return Fused;<br>
> > - }<br>
> > - }<br>
> > -<br>
> > - // When FP_EXTEND nodes are free on the target, and there is an<br>
> > opportunity<br>
> > - // to combine into FMA, arrange such nodes accordingly.<br>
> > - if (TLI.isFPExtFree(VT)) {<br>
> > -<br>
> > - // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext<br>
> > y), z)<br>
> > - if (N0.getOpcode() == ISD::FP_EXTEND) {<br>
> > - SDValue N00 = N0.getOperand(0);<br>
> > - if (N00.getOpcode() == ISD::FMUL)<br>
> > - return DAG.getNode(ISD::FMA, SDLoc(N), VT,<br>
> > - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,<br>
> > - N00.getOperand(0)),<br>
> > - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,<br>
> > - N00.getOperand(1)), N1);<br>
> > - }<br>
> > -<br>
> > - // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y),<br>
> > (fpext z), x)<br>
> > - // Note: Commutes FADD operands.<br>
> > - if (N1.getOpcode() == ISD::FP_EXTEND) {<br>
> > - SDValue N10 = N1.getOperand(0);<br>
> > - if (N10.getOpcode() == ISD::FMUL)<br>
> > - return DAG.getNode(ISD::FMA, SDLoc(N), VT,<br>
> > - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,<br>
> > - N10.getOperand(0)),<br>
> > - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,<br>
> > - N10.getOperand(1)), N0);<br>
> > - }<br>
> > - }<br>
> > + SDValue Fused = visitFADDForFMACombine(N);<br>
> > + if (Fused) {<br>
> > + AddToWorklist(Fused.getNode());<br>
> > + return Fused;<br>
> > }<br>
> ><br>
> > return SDValue();<br>
> > @@ -7431,96 +7705,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N<br>
> > }<br>
> > }<br>
> ><br>
> > - if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {<br>
> > - // Assume if there is an fmad instruction that it should be<br>
> > aggressively<br>
> > - // used.<br>
> > - if (SDValue Fused = performFsubFmulCombines(ISD::FMAD, true, N,<br>
> > TLI, DAG))<br>
> > - return Fused;<br>
> > - }<br>
> > -<br>
> > // FSUB -> FMA combines:<br>
> > - if ((Options.AllowFPOpFusion == FPOpFusion::Fast ||<br>
> > Options.UnsafeFPMath) &&<br>
> > - TLI.isFMAFasterThanFMulAndFAdd(VT) &&<br>
> > - (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)))<br>
> > {<br>
> > -<br>
> > - if (!TLI.isOperationLegal(ISD::FMAD, VT)) {<br>
> > - // Don't form FMA if we are preferring FMAD.<br>
> > -<br>
> > - if (SDValue Fused<br>
> > - = performFsubFmulCombines(ISD::FMA,<br>
> > - TLI.enableAggressiveFMAFusion(VT),<br>
> > - N, TLI, DAG)) {<br>
> > - return Fused;<br>
> > - }<br>
> > - }<br>
> > -<br>
> > - // When FP_EXTEND nodes are free on the target, and there is an<br>
> > opportunity<br>
> > - // to combine into FMA, arrange such nodes accordingly.<br>
> > - if (TLI.isFPExtFree(VT)) {<br>
> > - // fold (fsub (fpext (fmul x, y)), z)<br>
> > - // -> (fma (fpext x), (fpext y), (fneg z))<br>
> > - if (N0.getOpcode() == ISD::FP_EXTEND) {<br>
> > - SDValue N00 = N0.getOperand(0);<br>
> > - if (N00.getOpcode() == ISD::FMUL)<br>
> > - return DAG.getNode(ISD::FMA, SDLoc(N), VT,<br>
> > - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,<br>
> > - N00.getOperand(0)),<br>
> > - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,<br>
> > - N00.getOperand(1)),<br>
> > - DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));<br>
> > - }<br>
> > -<br>
> > - // fold (fsub x, (fpext (fmul y, z)))<br>
> > - // -> (fma (fneg (fpext y)), (fpext z), x)<br>
> > - // Note: Commutes FSUB operands.<br>
> > - if (N1.getOpcode() == ISD::FP_EXTEND) {<br>
> > - SDValue N10 = N1.getOperand(0);<br>
> > - if (N10.getOpcode() == ISD::FMUL)<br>
> > - return DAG.getNode(ISD::FMA, SDLoc(N), VT,<br>
> > - DAG.getNode(ISD::FNEG, SDLoc(N), VT,<br>
> > - DAG.getNode(ISD::FP_EXTEND, SDLoc(N),<br>
> > - VT, N10.getOperand(0))),<br>
> > - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,<br>
> > - N10.getOperand(1)),<br>
> > - N0);<br>
> > - }<br>
> > -<br>
> > - // fold (fsub (fpext (fneg (fmul, x, y))), z)<br>
> > - // -> (fma (fneg (fpext x)), (fpext y), (fneg z))<br>
> > - if (N0.getOpcode() == ISD::FP_EXTEND) {<br>
> > - SDValue N00 = N0.getOperand(0);<br>
> > - if (N00.getOpcode() == ISD::FNEG) {<br>
> > - SDValue N000 = N00.getOperand(0);<br>
> > - if (N000.getOpcode() == ISD::FMUL) {<br>
> > - return DAG.getNode(ISD::FMA, dl, VT,<br>
> > - DAG.getNode(ISD::FNEG, dl, VT,<br>
> > - DAG.getNode(ISD::FP_EXTEND, SDLoc(N),<br>
> > - VT, N000.getOperand(0))),<br>
> > - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,<br>
> > - N000.getOperand(1)),<br>
> > - DAG.getNode(ISD::FNEG, dl, VT, N1));<br>
> > - }<br>
> > - }<br>
> > - }<br>
> > -<br>
> > - // fold (fsub (fneg (fpext (fmul, x, y))), z)<br>
> > - // -> (fma (fneg (fpext x)), (fpext y), (fneg z))<br>
> > - if (N0.getOpcode() == ISD::FNEG) {<br>
> > - SDValue N00 = N0.getOperand(0);<br>
> > - if (N00.getOpcode() == ISD::FP_EXTEND) {<br>
> > - SDValue N000 = N00.getOperand(0);<br>
> > - if (N000.getOpcode() == ISD::FMUL) {<br>
> > - return DAG.getNode(ISD::FMA, dl, VT,<br>
> > - DAG.getNode(ISD::FNEG, dl, VT,<br>
> > - DAG.getNode(ISD::FP_EXTEND, SDLoc(N),<br>
> > - VT, N000.getOperand(0))),<br>
> > - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,<br>
> > - N000.getOperand(1)),<br>
> > - DAG.getNode(ISD::FNEG, dl, VT, N1));<br>
> > - }<br>
> > - }<br>
> > - }<br>
> > - }<br>
> > + SDValue Fused = visitFSUBForFMACombine(N);<br>
> > + if (Fused) {<br>
> > + AddToWorklist(Fused.getNode());<br>
> > + return Fused;<br>
> > }<br>
> ><br>
> > return SDValue();<br>
> ><br>
> > Modified: llvm/trunk/test/CodeGen/PowerPC/fma-assoc.ll<br>
> > URL:<br>
> > <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fma-assoc.ll?rev=234513&r1=234512&r2=234513&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fma-assoc.ll?rev=234513&r1=234512&r2=234513&view=diff</a><br>
> > ==============================================================================<br>
> > --- llvm/trunk/test/CodeGen/PowerPC/fma-assoc.ll (original)<br>
> > +++ llvm/trunk/test/CodeGen/PowerPC/fma-assoc.ll Thu Apr 9 12:55:26<br>
> > 2015<br>
> > @@ -77,3 +77,159 @@ define double @test_FMSUB_ASSOC2(double<br>
> > ; CHECK-VSX-NEXT: blr<br>
> > }<br>
> ><br>
> > +define double @test_FMADD_ASSOC_EXT1(float %A, float %B, double<br>
> > %C,<br>
> > + double %D, double %E) {<br>
> > + %F = fmul float %A, %B ; <float> [#uses=1]<br>
> > + %G = fpext float %F to double ; <double> [#uses=1]<br>
> > + %H = fmul double %C, %D ; <double> [#uses=1]<br>
> > + %I = fadd double %H, %G ; <double> [#uses=1]<br>
> > + %J = fadd double %I, %E ; <double> [#uses=1]<br>
> > + ret double %J<br>
> > +; CHECK-LABEL: test_FMADD_ASSOC_EXT1:<br>
> > +; CHECK: fmadd<br>
> > +; CHECK-NEXT: fmadd<br>
> > +; CHECK-NEXT: blr<br>
> > +<br>
> > +; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT1:<br>
> > +; CHECK-VSX: xsmaddmdp<br>
> > +; CHECK-VSX-NEXT: xsmaddadp<br>
> > +; CHECK-VSX-NEXT: blr<br>
> > +}<br>
> > +<br>
> > +define double @test_FMADD_ASSOC_EXT2(float %A, float %B, float %C,<br>
> > + float %D, double %E) {<br>
> > + %F = fmul float %A, %B ; <float> [#uses=1]<br>
> > + %G = fmul float %C, %D ; <float> [#uses=1]<br>
> > + %H = fadd float %F, %G ; <float> [#uses=1]<br>
> > + %I = fpext float %H to double ; <double> [#uses=1]<br>
> > + %J = fadd double %I, %E ; <double> [#uses=1]<br>
> > + ret double %J<br>
> > +; CHECK-LABEL: test_FMADD_ASSOC_EXT2:<br>
> > +; CHECK: fmadd<br>
> > +; CHECK-NEXT: fmadd<br>
> > +; CHECK-NEXT: blr<br>
> > +<br>
> > +; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT2:<br>
> > +; CHECK-VSX: xsmaddmdp<br>
> > +; CHECK-VSX-NEXT: xsmaddadp<br>
> > +; CHECK-VSX-NEXT: fmr<br>
> > +; CHECK-VSX-NEXT: blr<br>
> > +}<br>
> > +<br>
> > +define double @test_FMADD_ASSOC_EXT3(float %A, float %B, double<br>
> > %C,<br>
> > + double %D, double %E) {<br>
> > + %F = fmul float %A, %B ; <float> [#uses=1]<br>
> > + %G = fpext float %F to double ; <double> [#uses=1]<br>
> > + %H = fmul double %C, %D ; <double> [#uses=1]<br>
> > + %I = fadd double %H, %G ; <double> [#uses=1]<br>
> > + %J = fadd double %E, %I ; <double> [#uses=1]<br>
> > + ret double %J<br>
> > +; CHECK-LABEL: test_FMADD_ASSOC_EXT3:<br>
> > +; CHECK: fmadd<br>
> > +; CHECK-NEXT: fmadd<br>
> > +; CHECK-NEXT: blr<br>
> > +<br>
> > +; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT3:<br>
> > +; CHECK-VSX: xsmaddmdp<br>
> > +; CHECK-VSX-NEXT: xsmaddadp<br>
> > +; CHECK-VSX-NEXT: blr<br>
> > +}<br>
> > +<br>
> > +define double @test_FMADD_ASSOC_EXT4(float %A, float %B, float %C,<br>
> > + float %D, double %E) {<br>
> > + %F = fmul float %A, %B ; <float> [#uses=1]<br>
> > + %G = fmul float %C, %D ; <float> [#uses=1]<br>
> > + %H = fadd float %F, %G ; <float> [#uses=1]<br>
> > + %I = fpext float %H to double ; <double> [#uses=1]<br>
> > + %J = fadd double %E, %I ; <double> [#uses=1]<br>
> > + ret double %J<br>
> > +; CHECK-LABEL: test_FMADD_ASSOC_EXT4:<br>
> > +; CHECK: fmadd<br>
> > +; CHECK-NEXT: fmadd<br>
> > +; CHECK-NEXT: blr<br>
> > +<br>
> > +; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT4:<br>
> > +; CHECK-VSX: xsmaddmdp<br>
> > +; CHECK-VSX-NEXT: xsmaddadp<br>
> > +; CHECK-VSX-NEXT: fmr<br>
> > +; CHECK-VSX-NEXT: blr<br>
> > +}<br>
> > +<br>
> > +define double @test_FMSUB_ASSOC_EXT1(float %A, float %B, double<br>
> > %C,<br>
> > + double %D, double %E) {<br>
> > + %F = fmul float %A, %B ; <float> [#uses=1]<br>
> > + %G = fpext float %F to double ; <double> [#uses=1]<br>
> > + %H = fmul double %C, %D ; <double> [#uses=1]<br>
> > + %I = fadd double %H, %G ; <double> [#uses=1]<br>
> > + %J = fsub double %I, %E ; <double> [#uses=1]<br>
> > + ret double %J<br>
> > +; CHECK-LABEL: test_FMSUB_ASSOC_EXT1:<br>
> > +; CHECK: fmsub<br>
> > +; CHECK-NEXT: fmadd<br>
> > +; CHECK-NEXT: blr<br>
> > +<br>
> > +; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT1:<br>
> > +; CHECK-VSX: xsmsubmdp<br>
> > +; CHECK-VSX-NEXT: xsmaddadp<br>
> > +; CHECK-VSX-NEXT: blr<br>
> > +}<br>
> > +<br>
> > +define double @test_FMSUB_ASSOC_EXT2(float %A, float %B, float %C,<br>
> > + float %D, double %E) {<br>
> > + %F = fmul float %A, %B ; <float> [#uses=1]<br>
> > + %G = fmul float %C, %D ; <float> [#uses=1]<br>
> > + %H = fadd float %F, %G ; <float> [#uses=1]<br>
> > + %I = fpext float %H to double ; <double> [#uses=1]<br>
> > + %J = fsub double %I, %E ; <double> [#uses=1]<br>
> > + ret double %J<br>
> > +; CHECK-LABEL: test_FMSUB_ASSOC_EXT2:<br>
> > +; CHECK: fmsub<br>
> > +; CHECK-NEXT: fmadd<br>
> > +; CHECK-NEXT: blr<br>
> > +<br>
> > +; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT2:<br>
> > +; CHECK-VSX: xsmsubmdp<br>
> > +; CHECK-VSX-NEXT: xsmaddadp<br>
> > +; CHECK-VSX-NEXT: fmr<br>
> > +; CHECK-VSX-NEXT: blr<br>
> > +}<br>
> > +<br>
> > +define double @test_FMSUB_ASSOC_EXT3(float %A, float %B, double<br>
> > %C,<br>
> > + double %D, double %E) {<br>
> > + %F = fmul float %A, %B ; <float> [#uses=1]<br>
> > + %G = fpext float %F to double ; <double> [#uses=1]<br>
> > + %H = fmul double %C, %D ; <double> [#uses=1]<br>
> > + %I = fadd double %H, %G ; <double> [#uses=1]<br>
> > + %J = fsub double %E, %I ; <double> [#uses=1]<br>
> > + ret double %J<br>
> > +; CHECK-LABEL: test_FMSUB_ASSOC_EXT3:<br>
> > +; CHECK: fnmsub<br>
> > +; CHECK-NEXT: fnmsub<br>
> > +; CHECK-NEXT: blr<br>
> > +<br>
> > +; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT3:<br>
> > +; CHECK-VSX: xsnmsubmdp<br>
> > +; CHECK-VSX-NEXT: xsnmsubadp<br>
> > +; CHECK-VSX-NEXT: fmr<br>
> > +; CHECK-VSX-NEXT: blr<br>
> > +}<br>
> > +<br>
> > +define double @test_FMSUB_ASSOC_EXT4(float %A, float %B, float %C,<br>
> > + float %D, double %E) {<br>
> > + %F = fmul float %A, %B ; <float> [#uses=1]<br>
> > + %G = fmul float %C, %D ; <float> [#uses=1]<br>
> > + %H = fadd float %F, %G ; <float> [#uses=1]<br>
> > + %I = fpext float %H to double ; <double> [#uses=1]<br>
> > + %J = fsub double %E, %I ; <double> [#uses=1]<br>
> > + ret double %J<br>
> > +; CHECK-LABEL: test_FMSUB_ASSOC_EXT4:<br>
> > +; CHECK: fnmsub<br>
> > +; CHECK-NEXT: fnmsub<br>
> > +; CHECK-NEXT: blr<br>
> > +<br>
> > +; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT4:<br>
> > +; CHECK-VSX: xsnmsubmdp<br>
> > +; CHECK-VSX-NEXT: xsnmsubadp<br>
> > +; CHECK-VSX-NEXT: fmr<br>
> > +; CHECK-VSX-NEXT: blr<br>
> > +}<br>
> > \ No newline at end of file<br>
> ><br>
> > Modified: llvm/trunk/test/CodeGen/PowerPC/fma-ext.ll<br>
> > URL:<br>
> > <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fma-ext.ll?rev=234513&r1=234512&r2=234513&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fma-ext.ll?rev=234513&r1=234512&r2=234513&view=diff</a><br>
> > ==============================================================================<br>
> > --- llvm/trunk/test/CodeGen/PowerPC/fma-ext.ll (original)<br>
> > +++ llvm/trunk/test/CodeGen/PowerPC/fma-ext.ll Thu Apr 9 12:55:26<br>
> > 2015<br>
> > @@ -65,13 +65,11 @@ define double @test_FMSUB_EXT3(float %A,<br>
> > %G = fsub double %F, %C ; <double> [#uses=1]<br>
> > ret double %G<br>
> > ; CHECK-LABEL: test_FMSUB_EXT3:<br>
> > -; CHECK: fneg<br>
> > -; CHECK-NEXT: fmsub<br>
> > +; CHECK: fnmadd<br>
> > ; CHECK-NEXT: blr<br>
> ><br>
> > ; CHECK-VSX-LABEL: test_FMSUB_EXT3:<br>
> > -; CHECK-VSX: xsnegdp<br>
> > -; CHECK-VSX-NEXT: xsmsubmdp<br>
> > +; CHECK-VSX: xsnmaddmdp<br>
> > ; CHECK-VSX-NEXT: blr<br>
> > }<br>
> ><br>
> > @@ -82,12 +80,10 @@ define double @test_FMSUB_EXT4(float %A,<br>
> > %G = fsub double %F, %C ; <double> [#uses=1]<br>
> > ret double %G<br>
> > ; CHECK-LABEL: test_FMSUB_EXT4:<br>
> > -; CHECK: fneg<br>
> > -; CHECK-NEXT: fmsub<br>
> > +; CHECK: fnmadd<br>
> > ; CHECK-NEXT: blr<br>
> ><br>
> > ; CHECK-VSX-LABEL: test_FMSUB_EXT4:<br>
> > -; CHECK-VSX: xsnegdp<br>
> > -; CHECK-VSX-NEXT: xsmsubmdp<br>
> > +; CHECK-VSX: xsnmaddmdp<br>
> > ; CHECK-VSX-NEXT: blr<br>
> > -}<br>
> > \ No newline at end of file<br>
> > +}<br>
> ><br>
> ><br>
> > _______________________________________________<br>
> > llvm-commits mailing list<br>
> > <a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>
> > <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
> _______________________________________________<br>
> llvm-commits mailing list<br>
> <a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>
> <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
><br>
><br>
> _______________________________________________<br>
> llvm-commits mailing list<br>
> <a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>
> <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
><br>
<br>
</div></div><span class="HOEnZb"><font color="#888888">--<br>
Hal Finkel<br>
Assistant Computational Scientist<br>
Leadership Computing Facility<br>
Argonne National Laboratory<br>
</font></span></blockquote></div><br></div>