[llvm-commits] [llvm] r162956 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/fp-fast.ll
Hal Finkel
hfinkel at anl.gov
Thu Aug 30 16:55:35 PDT 2012
On Thu, 30 Aug 2012 23:35:16 -0000
Owen Anderson <resistor at mac.com> wrote:
> Author: resistor
> Date: Thu Aug 30 18:35:16 2012
> New Revision: 162956
>
> URL: http://llvm.org/viewvc/llvm-project?rev=162956&view=rev
> Log:
> Teach the DAG combiner to turn chains of FADDs (x+x+x+x+...) into
> FMULs by constants. This is only enabled in unsafe FP math mode,
> since it does not preserve rounding effects for all such constants.
Are there some constants for which this always is safe (powers of two?)?
-Hal
>
> Added:
> llvm/trunk/test/CodeGen/X86/fp-fast.ll
> Modified:
> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=162956&r1=162955&r2=162956&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Aug 30
> 18:35:16 2012 @@ -5681,6 +5681,127 @@ DAG.getNode(ISD::FADD,
> N->getDebugLoc(), VT, N0.getOperand(1), N1));
>
> + // In unsafe math mode, we can fold chains of FADD's of the same
> value
> + // into multiplications. This transform is not safe in general
> because
> + // we are reducing the number of rounding steps.
> + if (DAG.getTarget().Options.UnsafeFPMath &&
> + TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
> + !N0CFP && !N1CFP) {
> + if (N0.getOpcode() == ISD::FMUL) {
> + ConstantFPSDNode *CFP00 =
> dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
> + ConstantFPSDNode *CFP01 =
> dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); +
> + // (fadd (fmul c, x), x) -> (fmul c+1, x)
> + if (CFP00 && !CFP01 && N0.getOperand(1) == N1) {
> + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
> + SDValue(CFP00, 0),
> + DAG.getConstantFP(1.0, VT));
> + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
> + N1, NewCFP);
> + }
> +
> + // (fadd (fmul x, c), x) -> (fmul c+1, x)
> + if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
> + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
> + SDValue(CFP01, 0),
> + DAG.getConstantFP(1.0, VT));
> + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
> + N1, NewCFP);
> + }
> +
> + // (fadd (fadd x, x), x) -> (fmul 3.0, x)
> + if (!CFP00 && !CFP01 && N0.getOperand(0) == N0.getOperand(1) &&
> + N0.getOperand(0) == N1) {
> + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
> + N1, DAG.getConstantFP(3.0, VT));
> + }
> +
> + // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x)
> + if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD &&
> + N1.getOperand(0) == N1.getOperand(1) &&
> + N0.getOperand(1) == N1.getOperand(0)) {
> + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
> + SDValue(CFP00, 0),
> + DAG.getConstantFP(2.0, VT));
> + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
> + N0.getOperand(1), NewCFP);
> + }
> +
> + // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x)
> + if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
> + N1.getOperand(0) == N1.getOperand(1) &&
> + N0.getOperand(0) == N1.getOperand(0)) {
> + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
> + SDValue(CFP01, 0),
> + DAG.getConstantFP(2.0, VT));
> + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
> + N0.getOperand(0), NewCFP);
> + }
> + }
> +
> + if (N1.getOpcode() == ISD::FMUL) {
> + ConstantFPSDNode *CFP10 =
> dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
> + ConstantFPSDNode *CFP11 =
> dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); +
> + // (fadd x, (fmul c, x)) -> (fmul c+1, x)
> + if (CFP10 && !CFP11 && N1.getOperand(1) == N0) {
> + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
> + SDValue(CFP10, 0),
> + DAG.getConstantFP(1.0, VT));
> + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
> + N0, NewCFP);
> + }
> +
> + // (fadd x, (fmul x, c)) -> (fmul c+1, x)
> + if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
> + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
> + SDValue(CFP11, 0),
> + DAG.getConstantFP(1.0, VT));
> + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
> + N0, NewCFP);
> + }
> +
> + // (fadd x, (fadd x, x)) -> (fmul 3.0, x)
> + if (!CFP10 && !CFP11 && N1.getOperand(0) == N1.getOperand(1) &&
> + N1.getOperand(0) == N0) {
> + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
> + N0, DAG.getConstantFP(3.0, VT));
> + }
> +
> + // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x)
> + if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD &&
> + N1.getOperand(0) == N1.getOperand(1) &&
> + N0.getOperand(1) == N1.getOperand(0)) {
> + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
> + SDValue(CFP10, 0),
> + DAG.getConstantFP(2.0, VT));
> + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
> + N0.getOperand(1), NewCFP);
> + }
> +
> + // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x)
> + if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD &&
> + N1.getOperand(0) == N1.getOperand(1) &&
> + N0.getOperand(0) == N1.getOperand(0)) {
> + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
> + SDValue(CFP11, 0),
> + DAG.getConstantFP(2.0, VT));
> + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
> + N0.getOperand(0), NewCFP);
> + }
> + }
> +
> + // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x)
> + if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
> + N0.getOperand(0) == N0.getOperand(1) &&
> + N1.getOperand(0) == N1.getOperand(1) &&
> + N0.getOperand(0) == N1.getOperand(0)) {
> + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
> + N0.getOperand(0),
> + DAG.getConstantFP(4.0, VT));
> + }
> + }
> +
> // FADD -> FMA combines:
> if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
> DAG.getTarget().Options.UnsafeFPMath) &&
> @@ -5692,7 +5813,7 @@
> return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
> N0.getOperand(0), N0.getOperand(1), N1);
> }
> -
> +
> // fold (fadd x, (fmul y, z)) -> (fma x, y, z)
> // Note: Commutes FADD operands.
> if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
>
> Added: llvm/trunk/test/CodeGen/X86/fp-fast.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-fast.ll?rev=162956&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/fp-fast.ll (added) +++
> llvm/trunk/test/CodeGen/X86/fp-fast.ll Thu Aug 30 18:35:16 2012 @@
> -0,0 +1,37 @@ +; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin
> -enable-unsafe-fp-math < %s | FileCheck %s +
> +; CHECK: test1
> +define float @test1(float %a) {
> +; CHECK-NOT: vaddss
> +; CHECK: vmulss
> +; CHECK-NOT: vaddss
> +; CHECK: ret
> + %t1 = fadd float %a, %a
> + %r = fadd float %t1, %t1
> + ret float %r
> +}
> +
> +; CHECK: test2
> +define float @test2(float %a) {
> +; CHECK-NOT: vaddss
> +; CHECK: vmulss
> +; CHECK-NOT: vaddss
> +; CHECK: ret
> + %t1 = fmul float 4.0, %a
> + %t2 = fadd float %a, %a
> + %r = fadd float %t1, %t2
> + ret float %r
> +}
> +
> +; CHECK: test3
> +define float @test3(float %a) {
> +; CHECK-NOT: vaddss
> +; CHECK: vxorps
> +; CHECK-NOT: vaddss
> +; CHECK: ret
> + %t1 = fmul float 2.0, %a
> + %t2 = fadd float %a, %a
> + %r = fsub float %t1, %t2
> + ret float %r
> +}
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
--
Hal Finkel
Postdoctoral Appointee
Leadership Computing Facility
Argonne National Laboratory
More information about the llvm-commits
mailing list