[llvm] r234513 - Refactoring and enhancement to FMA combine.

Rafael EspĂ­ndola rafael.espindola at gmail.com
Thu Apr 9 11:33:26 PDT 2015


Reverted since this was failing on the bots:

http://lab.llvm.org:8011/builders/clang-x86_64-debian-fast/builds/25489

On 9 April 2015 at 13:55, Olivier Sallenave <ohsallen at us.ibm.com> wrote:
> Author: ohsallen
> Date: Thu Apr  9 12:55:26 2015
> New Revision: 234513
>
> URL: http://llvm.org/viewvc/llvm-project?rev=234513&view=rev
> Log:
> Refactoring and enhancement to FMA combine.
>
> Modified:
>     llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>     llvm/trunk/test/CodeGen/PowerPC/fma-assoc.ll
>     llvm/trunk/test/CodeGen/PowerPC/fma-ext.ll
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=234513&r1=234512&r2=234513&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Apr  9 12:55:26 2015
> @@ -308,6 +308,9 @@ namespace {
>      SDValue visitMLOAD(SDNode *N);
>      SDValue visitMSTORE(SDNode *N);
>
> +    SDValue visitFADDForFMACombine(SDNode *N);
> +    SDValue visitFSUBForFMACombine(SDNode *N);
> +
>      SDValue XformToShuffleWithZero(SDNode *N);
>      SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
>
> @@ -7057,20 +7060,40 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode
>    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
>  }
>
> -// Attempt different variants of (fadd (fmul a, b), c) -> fma or fmad
> -static SDValue performFaddFmulCombines(unsigned FusedOpcode,
> -                                       bool Aggressive,
> -                                       SDNode *N,
> -                                       const TargetLowering &TLI,
> -                                       SelectionDAG &DAG) {
> +/// Try to perform FMA combining on a given FADD node.
> +SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
>    SDValue N0 = N->getOperand(0);
>    SDValue N1 = N->getOperand(1);
>    EVT VT = N->getValueType(0);
> +  SDLoc SL(N);
> +
> +  const TargetOptions &Options = DAG.getTarget().Options;
> +  bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
> +                       Options.UnsafeFPMath);
> +
> +  // Floating-point multiply-add with intermediate rounding.
> +  bool HasFMAD = (LegalOperations &&
> +                  TLI.isOperationLegal(ISD::FMAD, VT));
> +
> +  // Floating-point multiply-add without intermediate rounding.
> +  bool HasFMA = ((!LegalOperations ||
> +                  TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
> +                 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
> +                 UnsafeFPMath);
> +
> +  // No valid opcode, do not combine.
> +  if (!HasFMAD && !HasFMA)
> +    return SDValue();
> +
> +  // Always prefer FMAD to FMA for precision.
> +  unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
> +  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
> +  bool LookThroughFPExt = TLI.isFPExtFree(VT);
>
>    // fold (fadd (fmul x, y), z) -> (fma x, y, z)
>    if (N0.getOpcode() == ISD::FMUL &&
>        (Aggressive || N0->hasOneUse())) {
> -    return DAG.getNode(FusedOpcode, SDLoc(N), VT,
> +    return DAG.getNode(PreferredFusedOpcode, SL, VT,
>                         N0.getOperand(0), N0.getOperand(1), N1);
>    }
>
> @@ -7078,53 +7101,176 @@ static SDValue performFaddFmulCombines(u
>    // Note: Commutes FADD operands.
>    if (N1.getOpcode() == ISD::FMUL &&
>        (Aggressive || N1->hasOneUse())) {
> -    return DAG.getNode(FusedOpcode, SDLoc(N), VT,
> +    return DAG.getNode(PreferredFusedOpcode, SL, VT,
>                         N1.getOperand(0), N1.getOperand(1), N0);
>    }
>
> +  // Look through FP_EXTEND nodes to do more combining.
> +  if (UnsafeFPMath && LookThroughFPExt) {
> +    // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
> +    if (N0.getOpcode() == ISD::FP_EXTEND) {
> +      SDValue N00 = N0.getOperand(0);
> +      if (N00.getOpcode() == ISD::FMUL)
> +        return DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                       N00.getOperand(0)),
> +                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                       N00.getOperand(1)), N1);
> +    }
> +
> +    // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
> +    // Note: Commutes FADD operands.
> +    if (N1.getOpcode() == ISD::FP_EXTEND) {
> +      SDValue N10 = N1.getOperand(0);
> +      if (N10.getOpcode() == ISD::FMUL)
> +        return DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                       N10.getOperand(0)),
> +                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                       N10.getOperand(1)), N0);
> +    }
> +  }
> +
>    // More folding opportunities when target permits.
> -  if (Aggressive) {
> +  if (UnsafeFPMath && Aggressive) {
>      // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
> -    if (N0.getOpcode() == ISD::FMA &&
> +    if (N0.getOpcode() == PreferredFusedOpcode &&
>          N0.getOperand(2).getOpcode() == ISD::FMUL) {
> -      return DAG.getNode(FusedOpcode, SDLoc(N), VT,
> +      return DAG.getNode(PreferredFusedOpcode, SL, VT,
>                           N0.getOperand(0), N0.getOperand(1),
> -                         DAG.getNode(FusedOpcode, SDLoc(N), VT,
> +                         DAG.getNode(PreferredFusedOpcode, SL, VT,
>                                       N0.getOperand(2).getOperand(0),
>                                       N0.getOperand(2).getOperand(1),
>                                       N1));
>      }
>
>      // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
> -    if (N1->getOpcode() == ISD::FMA &&
> +    if (N1->getOpcode() == PreferredFusedOpcode &&
>          N1.getOperand(2).getOpcode() == ISD::FMUL) {
> -      return DAG.getNode(FusedOpcode, SDLoc(N), VT,
> +      return DAG.getNode(PreferredFusedOpcode, SL, VT,
>                           N1.getOperand(0), N1.getOperand(1),
> -                         DAG.getNode(FusedOpcode, SDLoc(N), VT,
> +                         DAG.getNode(PreferredFusedOpcode, SL, VT,
>                                       N1.getOperand(2).getOperand(0),
>                                       N1.getOperand(2).getOperand(1),
>                                       N0));
>      }
> +
> +    if (LookThroughFPExt) {
> +      // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
> +      //   -> (fma x, y, (fma (fpext u), (fpext v), z))
> +      auto FoldFAddFMAFPExtFMul = [&] (
> +          SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
> +        return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
> +                           DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
> +                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
> +                                       Z));
> +      };
> +      if (N0.getOpcode() == PreferredFusedOpcode) {
> +        SDValue N02 = N0.getOperand(2);
> +        if (N02.getOpcode() == ISD::FP_EXTEND) {
> +          SDValue N020 = N02.getOperand(0);
> +          if (N020.getOpcode() == ISD::FMUL)
> +            return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
> +                                        N020.getOperand(0), N020.getOperand(1),
> +                                        N1);
> +        }
> +      }
> +
> +      // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
> +      //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
> +      // FIXME: This turns two single-precision and one double-precision
> +      // operation into two double-precision operations, which might not be
> +      // interesting for all targets, especially GPUs.
> +      auto FoldFAddFPExtFMAFMul = [&] (
> +          SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
> +        return DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                           DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
> +                           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
> +                           DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
> +                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
> +                                       Z));
> +      };
> +      if (N0.getOpcode() == ISD::FP_EXTEND) {
> +        SDValue N00 = N0.getOperand(0);
> +        if (N00.getOpcode() == PreferredFusedOpcode) {
> +          SDValue N002 = N00.getOperand(2);
> +          if (N002.getOpcode() == ISD::FMUL)
> +            return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
> +                                        N002.getOperand(0), N002.getOperand(1),
> +                                        N1);
> +        }
> +      }
> +
> +      // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
> +      //   -> (fma y, z, (fma (fpext u), (fpext v), x))
> +      if (N1.getOpcode() == PreferredFusedOpcode) {
> +        SDValue N12 = N1.getOperand(2);
> +        if (N12.getOpcode() == ISD::FP_EXTEND) {
> +          SDValue N120 = N12.getOperand(0);
> +          if (N120.getOpcode() == ISD::FMUL)
> +            return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
> +                                        N120.getOperand(0), N120.getOperand(1),
> +                                        N0);
> +        }
> +      }
> +
> +      // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
> +      //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
> +      // FIXME: This turns two single-precision and one double-precision
> +      // operation into two double-precision operations, which might not be
> +      // interesting for all targets, especially GPUs.
> +      if (N1.getOpcode() == ISD::FP_EXTEND) {
> +        SDValue N10 = N1.getOperand(0);
> +        if (N10.getOpcode() == PreferredFusedOpcode) {
> +          SDValue N102 = N10.getOperand(2);
> +          if (N102.getOpcode() == ISD::FMUL)
> +            return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
> +                                        N102.getOperand(0), N102.getOperand(1),
> +                                        N0);
> +        }
> +      }
> +    }
>    }
>
>    return SDValue();
>  }
>
> -static SDValue performFsubFmulCombines(unsigned FusedOpcode,
> -                                       bool Aggressive,
> -                                       SDNode *N,
> -                                       const TargetLowering &TLI,
> -                                       SelectionDAG &DAG) {
> +/// Try to perform FMA combining on a given FSUB node.
> +SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
>    SDValue N0 = N->getOperand(0);
>    SDValue N1 = N->getOperand(1);
>    EVT VT = N->getValueType(0);
> -
>    SDLoc SL(N);
>
> +  const TargetOptions &Options = DAG.getTarget().Options;
> +  bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
> +                       Options.UnsafeFPMath);
> +
> +  // Floating-point multiply-add with intermediate rounding.
> +  bool HasFMAD = (LegalOperations &&
> +                  TLI.isOperationLegal(ISD::FMAD, VT));
> +
> +  // Floating-point multiply-add without intermediate rounding.
> +  bool HasFMA = ((!LegalOperations ||
> +                  TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
> +                 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
> +                 UnsafeFPMath);
> +
> +  // No valid opcode, do not combine.
> +  if (!HasFMAD && !HasFMA)
> +    return SDValue();
> +
> +  // Always prefer FMAD to FMA for precision.
> +  unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
> +  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
> +  bool LookThroughFPExt = TLI.isFPExtFree(VT);
> +
>    // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
>    if (N0.getOpcode() == ISD::FMUL &&
>        (Aggressive || N0->hasOneUse())) {
> -    return DAG.getNode(FusedOpcode, SL, VT,
> +    return DAG.getNode(PreferredFusedOpcode, SL, VT,
>                         N0.getOperand(0), N0.getOperand(1),
>                         DAG.getNode(ISD::FNEG, SL, VT, N1));
>    }
> @@ -7133,7 +7279,7 @@ static SDValue performFsubFmulCombines(u
>    // Note: Commutes FSUB operands.
>    if (N1.getOpcode() == ISD::FMUL &&
>        (Aggressive || N1->hasOneUse()))
> -    return DAG.getNode(FusedOpcode, SL, VT,
> +    return DAG.getNode(PreferredFusedOpcode, SL, VT,
>                         DAG.getNode(ISD::FNEG, SL, VT,
>                                     N1.getOperand(0)),
>                         N1.getOperand(1), N0);
> @@ -7144,41 +7290,213 @@ static SDValue performFsubFmulCombines(u
>        (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
>      SDValue N00 = N0.getOperand(0).getOperand(0);
>      SDValue N01 = N0.getOperand(0).getOperand(1);
> -    return DAG.getNode(FusedOpcode, SL, VT,
> +    return DAG.getNode(PreferredFusedOpcode, SL, VT,
>                         DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
>                         DAG.getNode(ISD::FNEG, SL, VT, N1));
>    }
>
> +  // Look through FP_EXTEND nodes to do more combining.
> +  if (UnsafeFPMath && LookThroughFPExt) {
> +    // fold (fsub (fpext (fmul x, y)), z)
> +    //   -> (fma (fpext x), (fpext y), (fneg z))
> +    if (N0.getOpcode() == ISD::FP_EXTEND) {
> +      SDValue N00 = N0.getOperand(0);
> +      if (N00.getOpcode() == ISD::FMUL)
> +        return DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                       N00.getOperand(0)),
> +                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                       N00.getOperand(1)),
> +                           DAG.getNode(ISD::FNEG, SL, VT, N1));
> +    }
> +
> +    // fold (fsub x, (fpext (fmul y, z)))
> +    //   -> (fma (fneg (fpext y)), (fpext z), x)
> +    // Note: Commutes FSUB operands.
> +    if (N1.getOpcode() == ISD::FP_EXTEND) {
> +      SDValue N10 = N1.getOperand(0);
> +      if (N10.getOpcode() == ISD::FMUL)
> +        return DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                           DAG.getNode(ISD::FNEG, SL, VT,
> +                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                                   N10.getOperand(0))),
> +                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                       N10.getOperand(1)),
> +                           N0);
> +    }
> +
> +    // fold (fsub (fpext (fneg (fmul, x, y))), z)
> +    //   -> (fneg (fma (fpext x), (fpext y), z))
> +    // Note: This could be removed with appropriate canonicalization of the
> +    // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
> +    // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
> +    // from implementing the canonicalization in visitFSUB.
> +    if (N0.getOpcode() == ISD::FP_EXTEND) {
> +      SDValue N00 = N0.getOperand(0);
> +      if (N00.getOpcode() == ISD::FNEG) {
> +        SDValue N000 = N00.getOperand(0);
> +        if (N000.getOpcode() == ISD::FMUL) {
> +          return DAG.getNode(ISD::FNEG, SL, VT,
> +                             DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                                     N000.getOperand(0)),
> +                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                                     N000.getOperand(1)),
> +                                         N1));
> +        }
> +      }
> +    }
> +
> +    // fold (fsub (fneg (fpext (fmul, x, y))), z)
> +    //   -> (fneg (fma (fpext x)), (fpext y), z)
> +    // Note: This could be removed with appropriate canonicalization of the
> +    // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
> +    // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
> +    // from implementing the canonicalization in visitFSUB.
> +    if (N0.getOpcode() == ISD::FNEG) {
> +      SDValue N00 = N0.getOperand(0);
> +      if (N00.getOpcode() == ISD::FP_EXTEND) {
> +        SDValue N000 = N00.getOperand(0);
> +        if (N000.getOpcode() == ISD::FMUL) {
> +          return DAG.getNode(ISD::FNEG, SL, VT,
> +                             DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                                     N000.getOperand(0)),
> +                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                                     N000.getOperand(1)),
> +                                         N1));
> +        }
> +      }
> +    }
> +
> +  }
> +
>    // More folding opportunities when target permits.
> -  if (Aggressive) {
> +  if (UnsafeFPMath && Aggressive) {
>      // fold (fsub (fma x, y, (fmul u, v)), z)
>      //   -> (fma x, y (fma u, v, (fneg z)))
> -    if (N0.getOpcode() == FusedOpcode &&
> +    if (N0.getOpcode() == PreferredFusedOpcode &&
>          N0.getOperand(2).getOpcode() == ISD::FMUL) {
> -      return DAG.getNode(FusedOpcode, SDLoc(N), VT,
> +      return DAG.getNode(PreferredFusedOpcode, SL, VT,
>                           N0.getOperand(0), N0.getOperand(1),
> -                         DAG.getNode(FusedOpcode, SDLoc(N), VT,
> +                         DAG.getNode(PreferredFusedOpcode, SL, VT,
>                                       N0.getOperand(2).getOperand(0),
>                                       N0.getOperand(2).getOperand(1),
> -                                     DAG.getNode(ISD::FNEG, SDLoc(N), VT,
> +                                     DAG.getNode(ISD::FNEG, SL, VT,
>                                                   N1)));
>      }
>
>      // fold (fsub x, (fma y, z, (fmul u, v)))
>      //   -> (fma (fneg y), z, (fma (fneg u), v, x))
> -    if (N1.getOpcode() == FusedOpcode &&
> +    if (N1.getOpcode() == PreferredFusedOpcode &&
>          N1.getOperand(2).getOpcode() == ISD::FMUL) {
>        SDValue N20 = N1.getOperand(2).getOperand(0);
>        SDValue N21 = N1.getOperand(2).getOperand(1);
> -      return DAG.getNode(FusedOpcode, SDLoc(N), VT,
> -                         DAG.getNode(ISD::FNEG, SDLoc(N), VT,
> +      return DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                         DAG.getNode(ISD::FNEG, SL, VT,
>                                       N1.getOperand(0)),
>                           N1.getOperand(1),
> -                         DAG.getNode(FusedOpcode, SDLoc(N), VT,
> -                                     DAG.getNode(ISD::FNEG, SDLoc(N),  VT,
> -                                                 N20),
> +                         DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                                     DAG.getNode(ISD::FNEG, SL, VT, N20),
>                                       N21, N0));
>      }
> +
> +    if (LookThroughFPExt) {
> +      // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
> +      //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
> +      if (N0.getOpcode() == PreferredFusedOpcode) {
> +        SDValue N02 = N0.getOperand(2);
> +        if (N02.getOpcode() == ISD::FP_EXTEND) {
> +          SDValue N020 = N02.getOperand(0);
> +          if (N020.getOpcode() == ISD::FMUL)
> +            return DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                               N0.getOperand(0), N0.getOperand(1),
> +                               DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                                       N020.getOperand(0)),
> +                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                                       N020.getOperand(1)),
> +                                           DAG.getNode(ISD::FNEG, SL, VT,
> +                                                       N1)));
> +        }
> +      }
> +
> +      // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
> +      //   -> (fma (fpext x), (fpext y),
> +      //           (fma (fpext u), (fpext v), (fneg z)))
> +      // FIXME: This turns two single-precision and one double-precision
> +      // operation into two double-precision operations, which might not be
> +      // interesting for all targets, especially GPUs.
> +      if (N0.getOpcode() == ISD::FP_EXTEND) {
> +        SDValue N00 = N0.getOperand(0);
> +        if (N00.getOpcode() == PreferredFusedOpcode) {
> +          SDValue N002 = N00.getOperand(2);
> +          if (N002.getOpcode() == ISD::FMUL)
> +            return DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                               DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                           N00.getOperand(0)),
> +                               DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                           N00.getOperand(1)),
> +                               DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                                       N002.getOperand(0)),
> +                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                                       N002.getOperand(1)),
> +                                           DAG.getNode(ISD::FNEG, SL, VT,
> +                                                       N1)));
> +        }
> +      }
> +
> +      // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
> +      //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
> +      if (N1.getOpcode() == PreferredFusedOpcode &&
> +        N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
> +        SDValue N120 = N1.getOperand(2).getOperand(0);
> +        if (N120.getOpcode() == ISD::FMUL) {
> +          SDValue N1200 = N120.getOperand(0);
> +          SDValue N1201 = N120.getOperand(1);
> +          return DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                             DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
> +                             N1.getOperand(1),
> +                             DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                                         DAG.getNode(ISD::FNEG, SL, VT,
> +                                             DAG.getNode(ISD::FP_EXTEND, SL,
> +                                                         VT, N1200)),
> +                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                                     N1201),
> +                                         N0));
> +        }
> +      }
> +
> +      // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
> +      //   -> (fma (fneg (fpext y)), (fpext z),
> +      //           (fma (fneg (fpext u)), (fpext v), x))
> +      // FIXME: This turns two single-precision and one double-precision
> +      // operation into two double-precision operations, which might not be
> +      // interesting for all targets, especially GPUs.
> +      if (N1.getOpcode() == ISD::FP_EXTEND &&
> +        N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
> +        SDValue N100 = N1.getOperand(0).getOperand(0);
> +        SDValue N101 = N1.getOperand(0).getOperand(1);
> +        SDValue N102 = N1.getOperand(0).getOperand(2);
> +        if (N102.getOpcode() == ISD::FMUL) {
> +          SDValue N1020 = N102.getOperand(0);
> +          SDValue N1021 = N102.getOperand(1);
> +          return DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                             DAG.getNode(ISD::FNEG, SL, VT,
> +                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                                     N100)),
> +                             DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
> +                             DAG.getNode(PreferredFusedOpcode, SL, VT,
> +                                         DAG.getNode(ISD::FNEG, SL, VT,
> +                                             DAG.getNode(ISD::FP_EXTEND, SL,
> +                                                         VT, N1020)),
> +                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
> +                                                     N1021),
> +                                         N0));
> +        }
> +      }
> +    }
>    }
>
>    return SDValue();
> @@ -7322,55 +7640,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N
>      }
>    } // enable-unsafe-fp-math
>
> -  if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
> -    // Assume if there is an fmad instruction that it should be aggressively
> -    // used.
> -    if (SDValue Fused = performFaddFmulCombines(ISD::FMAD, true, N, TLI, DAG))
> -      return Fused;
> -  }
> -
>    // FADD -> FMA combines:
> -  if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
> -      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
> -      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
> -
> -    if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
> -      // Don't form FMA if we are preferring FMAD.
> -      if (SDValue Fused
> -          = performFaddFmulCombines(ISD::FMA,
> -                                    TLI.enableAggressiveFMAFusion(VT),
> -                                    N, TLI, DAG)) {
> -        return Fused;
> -      }
> -    }
> -
> -    // When FP_EXTEND nodes are free on the target, and there is an opportunity
> -    // to combine into FMA, arrange such nodes accordingly.
> -    if (TLI.isFPExtFree(VT)) {
> -
> -      // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
> -      if (N0.getOpcode() == ISD::FP_EXTEND) {
> -        SDValue N00 = N0.getOperand(0);
> -        if (N00.getOpcode() == ISD::FMUL)
> -          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
> -                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
> -                                         N00.getOperand(0)),
> -                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
> -                                         N00.getOperand(1)), N1);
> -      }
> -
> -      // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x)
> -      // Note: Commutes FADD operands.
> -      if (N1.getOpcode() == ISD::FP_EXTEND) {
> -        SDValue N10 = N1.getOperand(0);
> -        if (N10.getOpcode() == ISD::FMUL)
> -          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
> -                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
> -                                         N10.getOperand(0)),
> -                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
> -                                         N10.getOperand(1)), N0);
> -      }
> -    }
> +  SDValue Fused = visitFADDForFMACombine(N);
> +  if (Fused) {
> +    AddToWorklist(Fused.getNode());
> +    return Fused;
>    }
>
>    return SDValue();
> @@ -7431,96 +7705,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N
>      }
>    }
>
> -  if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
> -    // Assume if there is an fmad instruction that it should be aggressively
> -    // used.
> -    if (SDValue Fused = performFsubFmulCombines(ISD::FMAD, true, N, TLI, DAG))
> -      return Fused;
> -  }
> -
>    // FSUB -> FMA combines:
> -  if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
> -      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
> -      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
> -
> -    if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
> -      // Don't form FMA if we are preferring FMAD.
> -
> -      if (SDValue Fused
> -          = performFsubFmulCombines(ISD::FMA,
> -                                    TLI.enableAggressiveFMAFusion(VT),
> -                                    N, TLI, DAG)) {
> -        return Fused;
> -      }
> -    }
> -
> -    // When FP_EXTEND nodes are free on the target, and there is an opportunity
> -    // to combine into FMA, arrange such nodes accordingly.
> -    if (TLI.isFPExtFree(VT)) {
> -      // fold (fsub (fpext (fmul x, y)), z)
> -      //   -> (fma (fpext x), (fpext y), (fneg z))
> -      if (N0.getOpcode() == ISD::FP_EXTEND) {
> -        SDValue N00 = N0.getOperand(0);
> -        if (N00.getOpcode() == ISD::FMUL)
> -          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
> -                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
> -                                         N00.getOperand(0)),
> -                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
> -                                         N00.getOperand(1)),
> -                             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
> -      }
> -
> -      // fold (fsub x, (fpext (fmul y, z)))
> -      //   -> (fma (fneg (fpext y)), (fpext z), x)
> -      // Note: Commutes FSUB operands.
> -      if (N1.getOpcode() == ISD::FP_EXTEND) {
> -        SDValue N10 = N1.getOperand(0);
> -        if (N10.getOpcode() == ISD::FMUL)
> -          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
> -                             DAG.getNode(ISD::FNEG, SDLoc(N), VT,
> -                                         DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
> -                                                     VT, N10.getOperand(0))),
> -                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
> -                                         N10.getOperand(1)),
> -                             N0);
> -      }
> -
> -      // fold (fsub (fpext (fneg (fmul, x, y))), z)
> -      //   -> (fma (fneg (fpext x)), (fpext y), (fneg z))
> -      if (N0.getOpcode() == ISD::FP_EXTEND) {
> -        SDValue N00 = N0.getOperand(0);
> -        if (N00.getOpcode() == ISD::FNEG) {
> -          SDValue N000 = N00.getOperand(0);
> -          if (N000.getOpcode() == ISD::FMUL) {
> -            return DAG.getNode(ISD::FMA, dl, VT,
> -                               DAG.getNode(ISD::FNEG, dl, VT,
> -                                           DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
> -                                                       VT, N000.getOperand(0))),
> -                               DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
> -                                           N000.getOperand(1)),
> -                               DAG.getNode(ISD::FNEG, dl, VT, N1));
> -          }
> -        }
> -      }
> -
> -      // fold (fsub (fneg (fpext (fmul, x, y))), z)
> -      //   -> (fma (fneg (fpext x)), (fpext y), (fneg z))
> -      if (N0.getOpcode() == ISD::FNEG) {
> -        SDValue N00 = N0.getOperand(0);
> -        if (N00.getOpcode() == ISD::FP_EXTEND) {
> -          SDValue N000 = N00.getOperand(0);
> -          if (N000.getOpcode() == ISD::FMUL) {
> -            return DAG.getNode(ISD::FMA, dl, VT,
> -                               DAG.getNode(ISD::FNEG, dl, VT,
> -                                           DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
> -                                           VT, N000.getOperand(0))),
> -                               DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
> -                                           N000.getOperand(1)),
> -                               DAG.getNode(ISD::FNEG, dl, VT, N1));
> -          }
> -        }
> -      }
> -    }
> +  SDValue Fused = visitFSUBForFMACombine(N);
> +  if (Fused) {
> +    AddToWorklist(Fused.getNode());
> +    return Fused;
>    }
>
>    return SDValue();
>
> Modified: llvm/trunk/test/CodeGen/PowerPC/fma-assoc.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fma-assoc.ll?rev=234513&r1=234512&r2=234513&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/fma-assoc.ll (original)
> +++ llvm/trunk/test/CodeGen/PowerPC/fma-assoc.ll Thu Apr  9 12:55:26 2015
> @@ -77,3 +77,159 @@ define double @test_FMSUB_ASSOC2(double
>  ; CHECK-VSX-NEXT: blr
>  }
>
> +define double @test_FMADD_ASSOC_EXT1(float %A, float %B, double %C,
> +                                 double %D, double %E) {
> +       %F = fmul float %A, %B         ; <float> [#uses=1]
> +       %G = fpext float %F to double   ; <double> [#uses=1]
> +       %H = fmul double %C, %D         ; <double> [#uses=1]
> +       %I = fadd double %H, %G         ; <double> [#uses=1]
> +       %J = fadd double %I, %E         ; <double> [#uses=1]
> +       ret double %J
> +; CHECK-LABEL: test_FMADD_ASSOC_EXT1:
> +; CHECK: fmadd
> +; CHECK-NEXT: fmadd
> +; CHECK-NEXT: blr
> +
> +; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT1:
> +; CHECK-VSX: xsmaddmdp
> +; CHECK-VSX-NEXT: xsmaddadp
> +; CHECK-VSX-NEXT: blr
> +}
> +
> +define double @test_FMADD_ASSOC_EXT2(float %A, float %B, float %C,
> +                                 float %D, double %E) {
> +       %F = fmul float %A, %B         ; <float> [#uses=1]
> +       %G = fmul float %C, %D         ; <float> [#uses=1]
> +       %H = fadd float %F, %G         ; <float> [#uses=1]
> +       %I = fpext float %H to double   ; <double> [#uses=1]
> +       %J = fadd double %I, %E         ; <double> [#uses=1]
> +       ret double %J
> +; CHECK-LABEL: test_FMADD_ASSOC_EXT2:
> +; CHECK: fmadd
> +; CHECK-NEXT: fmadd
> +; CHECK-NEXT: blr
> +
> +; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT2:
> +; CHECK-VSX: xsmaddmdp
> +; CHECK-VSX-NEXT: xsmaddadp
> +; CHECK-VSX-NEXT: fmr
> +; CHECK-VSX-NEXT: blr
> +}
> +
> +define double @test_FMADD_ASSOC_EXT3(float %A, float %B, double %C,
> +                                 double %D, double %E) {
> +       %F = fmul float %A, %B          ; <float> [#uses=1]
> +       %G = fpext float %F to double   ; <double> [#uses=1]
> +       %H = fmul double %C, %D         ; <double> [#uses=1]
> +       %I = fadd double %H, %G         ; <double> [#uses=1]
> +       %J = fadd double %E, %I         ; <double> [#uses=1]
> +       ret double %J
> +; CHECK-LABEL: test_FMADD_ASSOC_EXT3:
> +; CHECK: fmadd
> +; CHECK-NEXT: fmadd
> +; CHECK-NEXT: blr
> +
> +; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT3:
> +; CHECK-VSX: xsmaddmdp
> +; CHECK-VSX-NEXT: xsmaddadp
> +; CHECK-VSX-NEXT: blr
> +}
> +
> +define double @test_FMADD_ASSOC_EXT4(float %A, float %B, float %C,
> +                                 float %D, double %E) {
> +       %F = fmul float %A, %B          ; <float> [#uses=1]
> +       %G = fmul float %C, %D          ; <float> [#uses=1]
> +       %H = fadd float %F, %G          ; <float> [#uses=1]
> +       %I = fpext float %H to double   ; <double> [#uses=1]
> +       %J = fadd double %E, %I         ; <double> [#uses=1]
> +       ret double %J
> +; CHECK-LABEL: test_FMADD_ASSOC_EXT4:
> +; CHECK: fmadd
> +; CHECK-NEXT: fmadd
> +; CHECK-NEXT: blr
> +
> +; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT4:
> +; CHECK-VSX: xsmaddmdp
> +; CHECK-VSX-NEXT: xsmaddadp
> +; CHECK-VSX-NEXT: fmr
> +; CHECK-VSX-NEXT: blr
> +}
> +
> +define double @test_FMSUB_ASSOC_EXT1(float %A, float %B, double %C,
> +                                 double %D, double %E) {
> +       %F = fmul float %A, %B          ; <float> [#uses=1]
> +       %G = fpext float %F to double   ; <double> [#uses=1]
> +       %H = fmul double %C, %D         ; <double> [#uses=1]
> +       %I = fadd double %H, %G         ; <double> [#uses=1]
> +       %J = fsub double %I, %E         ; <double> [#uses=1]
> +       ret double %J
> +; CHECK-LABEL: test_FMSUB_ASSOC_EXT1:
> +; CHECK: fmsub
> +; CHECK-NEXT: fmadd
> +; CHECK-NEXT: blr
> +
> +; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT1:
> +; CHECK-VSX: xsmsubmdp
> +; CHECK-VSX-NEXT: xsmaddadp
> +; CHECK-VSX-NEXT: blr
> +}
> +
> +define double @test_FMSUB_ASSOC_EXT2(float %A, float %B, float %C,
> +                                 float %D, double %E) {
> +       %F = fmul float %A, %B          ; <float> [#uses=1]
> +       %G = fmul float %C, %D          ; <float> [#uses=1]
> +       %H = fadd float %F, %G          ; <float> [#uses=1]
> +       %I = fpext float %H to double   ; <double> [#uses=1]
> +       %J = fsub double %I, %E         ; <double> [#uses=1]
> +       ret double %J
> +; CHECK-LABEL: test_FMSUB_ASSOC_EXT2:
> +; CHECK: fmsub
> +; CHECK-NEXT: fmadd
> +; CHECK-NEXT: blr
> +
> +; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT2:
> +; CHECK-VSX: xsmsubmdp
> +; CHECK-VSX-NEXT: xsmaddadp
> +; CHECK-VSX-NEXT: fmr
> +; CHECK-VSX-NEXT: blr
> +}
> +
> +define double @test_FMSUB_ASSOC_EXT3(float %A, float %B, double %C,
> +                                 double %D, double %E) {
> +       %F = fmul float %A, %B          ; <float> [#uses=1]
> +       %G = fpext float %F to double   ; <double> [#uses=1]
> +       %H = fmul double %C, %D         ; <double> [#uses=1]
> +       %I = fadd double %H, %G         ; <double> [#uses=1]
> +       %J = fsub double %E, %I         ; <double> [#uses=1]
> +       ret double %J
> +; CHECK-LABEL: test_FMSUB_ASSOC_EXT3:
> +; CHECK: fnmsub
> +; CHECK-NEXT: fnmsub
> +; CHECK-NEXT: blr
> +
> +; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT3:
> +; CHECK-VSX: xsnmsubmdp
> +; CHECK-VSX-NEXT: xsnmsubadp
> +; CHECK-VSX-NEXT: fmr
> +; CHECK-VSX-NEXT: blr
> +}
> +
> +define double @test_FMSUB_ASSOC_EXT4(float %A, float %B, float %C,
> +                                 float %D, double %E) {
> +       %F = fmul float %A, %B          ; <float> [#uses=1]
> +       %G = fmul float %C, %D          ; <float> [#uses=1]
> +       %H = fadd float %F, %G          ; <float> [#uses=1]
> +       %I = fpext float %H to double   ; <double> [#uses=1]
> +       %J = fsub double %E, %I         ; <double> [#uses=1]
> +       ret double %J
> +; CHECK-LABEL: test_FMSUB_ASSOC_EXT4:
> +; CHECK: fnmsub
> +; CHECK-NEXT: fnmsub
> +; CHECK-NEXT: blr
> +
> +; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT4:
> +; CHECK-VSX: xsnmsubmdp
> +; CHECK-VSX-NEXT: xsnmsubadp
> +; CHECK-VSX-NEXT: fmr
> +; CHECK-VSX-NEXT: blr
> +}
> \ No newline at end of file
>
> Modified: llvm/trunk/test/CodeGen/PowerPC/fma-ext.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fma-ext.ll?rev=234513&r1=234512&r2=234513&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/fma-ext.ll (original)
> +++ llvm/trunk/test/CodeGen/PowerPC/fma-ext.ll Thu Apr  9 12:55:26 2015
> @@ -65,13 +65,11 @@ define double @test_FMSUB_EXT3(float %A,
>      %G = fsub double %F, %C         ; <double> [#uses=1]
>      ret double %G
>  ; CHECK-LABEL: test_FMSUB_EXT3:
> -; CHECK: fneg
> -; CHECK-NEXT: fmsub
> +; CHECK: fnmadd
>  ; CHECK-NEXT: blr
>
>  ; CHECK-VSX-LABEL: test_FMSUB_EXT3:
> -; CHECK-VSX: xsnegdp
> -; CHECK-VSX-NEXT: xsmsubmdp
> +; CHECK-VSX: xsnmaddmdp
>  ; CHECK-VSX-NEXT: blr
>  }
>
> @@ -82,12 +80,10 @@ define double @test_FMSUB_EXT4(float %A,
>      %G = fsub double %F, %C         ; <double> [#uses=1]
>      ret double %G
>  ; CHECK-LABEL: test_FMSUB_EXT4:
> -; CHECK: fneg
> -; CHECK-NEXT: fmsub
> +; CHECK: fnmadd
>  ; CHECK-NEXT: blr
>
>  ; CHECK-VSX-LABEL: test_FMSUB_EXT4:
> -; CHECK-VSX: xsnegdp
> -; CHECK-VSX-NEXT: xsmsubmdp
> +; CHECK-VSX: xsnmaddmdp
>  ; CHECK-VSX-NEXT: blr
> -}
> \ No newline at end of file
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits



More information about the llvm-commits mailing list