[llvm-commits] [llvm] r163051 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/ARM/fp-fast.ll

Mon Oct 15 04:08:48 PDT 2012

> +  // (fma x, -1, y) -> (fadd (fneg x), y)

This  particular change is target depedent and can include info from
target as to whether this is profitable or not.

Say if FMA has less cycle count than (fadd + fneg), then this
transformation is not
desired which is the case in bdver1 and bdver2 as illustrated below:

FMA - 6 cycles
FADD - 6 cylces
XOR  memory form - 1 or 5 cycles depending on operands (fneg is
implemented as "xor -0, reg" )

Before Transformation :

FMA x, -1, y (6 cycles)

After Transformation :

XOR  (negative_0_constant_location)  x    (5 cycles)
FADD -x,  y                  (6 cycles)

It is now 6 vs 11 cycles in the worst case.

- Anitha

On 1 September 2012 11:34, Owen Anderson <resistor at mac.com> wrote:
> Author: resistor
> Date: Sat Sep  1 01:04:27 2012
> New Revision: 163051
>
> URL: http://llvm.org/viewvc/llvm-project?rev=163051&view=rev
> Log:
> Teach DAG combine a number of tricks to simplify FMA expressions in fast-math mode.
>
> Added:
>     llvm/trunk/test/CodeGen/ARM/fp-fast.ll
> Modified:
>     llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=163051&r1=163050&r2=163051&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sat Sep  1 01:04:27 2012
> @@ -5988,6 +5988,7 @@
>    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
>    ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
>    EVT VT = N->getValueType(0);
> +  DebugLoc dl = N->getDebugLoc();
>
>    if (N0CFP && N0CFP->isExactlyValue(1.0))
>      return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2);
> @@ -5998,6 +5999,58 @@
>    if (N0CFP && !N1CFP)
>      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2);
>
> +  // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
> +  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
> +      N2.getOpcode() == ISD::FMUL &&
> +      N0 == N2.getOperand(0) &&
> +      N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
> +    return DAG.getNode(ISD::FMUL, dl, VT, N0,
> +                       DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
> +  }
> +
> +
> +  // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
> +  if (DAG.getTarget().Options.UnsafeFPMath &&
> +      N0.getOpcode() == ISD::FMUL && N1CFP &&
> +      N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
> +    return DAG.getNode(ISD::FMA, dl, VT,
> +                       N0.getOperand(0),
> +                       DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
> +                       N2);
> +  }
> +
> +  // (fma x, 1, y) -> (fadd x, y)
> +  // (fma x, -1, y) -> (fadd (fneg x), y)
> +  if (N1CFP) {
> +    if (N1CFP->isExactlyValue(1.0))
> +      return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
> +
> +    if (N1CFP->isExactlyValue(-1.0) &&
> +        (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
> +      SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
> +      AddToWorkList(RHSNeg.getNode());
> +      return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
> +    }
> +  }
> +
> +  // (fma x, c, x) -> (fmul x, (c+1))
> +  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) {
> +    return DAG.getNode(ISD::FMUL, dl, VT,
> +                       N0,
> +                       DAG.getNode(ISD::FADD, dl, VT,
> +                                   N1, DAG.getConstantFP(1.0, VT)));
> +  }
> +
> +  // (fma x, c, (fneg x)) -> (fmul x, (c-1))
> +  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
> +      N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
> +    return DAG.getNode(ISD::FMUL, dl, VT,
> +                       N0,
> +                       DAG.getNode(ISD::FADD, dl, VT,
> +                                   N1, DAG.getConstantFP(-1.0, VT)));
> +  }
> +
> +
>    return SDValue();
>  }
>
> @@ -6367,6 +6420,17 @@
>      }
>    }
>
> +  // (fneg (fmul c, x)) -> (fmul -c, x)
> +  if (N0.getOpcode() == ISD::FMUL) {
> +    ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
> +    if (CFP1) {
> +      return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
> +                         N0.getOperand(0),
> +                         DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
> +                                     N0.getOperand(1)));
> +    }
> +  }
> +
>    return SDValue();
>  }
>
>
> Added: llvm/trunk/test/CodeGen/ARM/fp-fast.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fp-fast.ll?rev=163051&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/ARM/fp-fast.ll (added)
> +++ llvm/trunk/test/CodeGen/ARM/fp-fast.ll Sat Sep  1 01:04:27 2012
> @@ -0,0 +1,60 @@
> +; RUN: llc -march=arm -mcpu=cortex-a9 -mattr=+vfp4 -enable-unsafe-fp-math < %s | FileCheck %s
> +
> +; CHECK: test1
> +define float @test1(float %x) {
> +; CHECK-NOT: vfma
> +; CHECK: vmul.f32
> +; CHECK-NOT: vfma
> +  %t1 = fmul float %x, 3.0
> +  %t2 = call float @llvm.fma.f32(float %x, float 2.0, float %t1)
> +  ret float %t2
> +}
> +
> +; CHECK: test2
> +define float @test2(float %x, float %y) {
> +; CHECK-NOT: vmul
> +; CHECK: vfma.f32
> +; CHECK-NOT: vmul
> +  %t1 = fmul float %x, 3.0
> +  %t2 = call float @llvm.fma.f32(float %t1, float 2.0, float %y)
> +  ret float %t2
> +}
> +
> +; CHECK: test3
> +define float @test3(float %x, float %y) {
> +; CHECK-NOT: vfma
> +; CHECK: vadd.f32
> +; CHECK-NOT: vfma
> +  %t2 = call float @llvm.fma.f32(float %x, float 1.0, float %y)
> +  ret float %t2
> +}
> +
> +; CHECK: test4
> +define float @test4(float %x, float %y) {
> +; CHECK-NOT: vfma
> +; CHECK: vsub.f32
> +; CHECK-NOT: vfma
> +  %t2 = call float @llvm.fma.f32(float %x, float -1.0, float %y)
> +  ret float %t2
> +}
> +
> +; CHECK: test5
> +define float @test5(float %x) {
> +; CHECK-NOT: vfma
> +; CHECK: vmul.f32
> +; CHECK-NOT: vfma
> +  %t2 = call float @llvm.fma.f32(float %x, float 2.0, float %x)
> +  ret float %t2
> +}
> +
> +; CHECK: test6
> +define float @test6(float %x) {
> +; CHECK-NOT: vfma
> +; CHECK: vmul.f32
> +; CHECK-NOT: vfma
> +  %t1 = fsub float -0.0, %x
> +  %t2 = call float @llvm.fma.f32(float %x, float 5.0, float %t1)
> +  ret float %t2
> +}
> +
> +declare float @llvm.fma.f32(float, float, float)
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits

-- 
 Anitha