[llvm-commits] [llvm] r163051 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/ARM/fp-fast.ll

Mon Oct 15 23:09:31 PDT 2012

On 15 October 2012 16:38, Anitha Boyapati <anitha.boyapati at gmail.com> wrote:
>> +  // (fma x, -1, y) -> (fadd (fneg x), y)
>
> This  particular change is target depedent and can include info from
> target as to whether this is profitable or not.
>
> Say if FMA has less cycle count than (fadd + fneg), then this
> transformation is not
> desired which is the case in bdver1 and bdver2 as illustrated below:

I did some more analysis.  (fadd+fneg) seem to get folded to fsub
which looks good. I take back the previous comment in cases where fsub
folding happens :-)

>
> FMA - 6 cycles
> FADD - 6 cylces
> XOR  memory form - 1 or 5 cycles depending on operands (fneg is
> implemented as "xor -0, reg" )
>
> Before Transformation :
>
> FMA x, -1, y (6 cycles)
>
>
> After Transformation :
>
> XOR  (negative_0_constant_location)  x    (5 cycles)
> FADD -x,  y                  (6 cycles)
>
> It is now 6 vs 11 cycles in the worst case.
>
>
> - Anitha
>
> On 1 September 2012 11:34, Owen Anderson <resistor at mac.com> wrote:
>> Author: resistor
>> Date: Sat Sep  1 01:04:27 2012
>> New Revision: 163051
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=163051&view=rev
>> Log:
>> Teach DAG combine a number of tricks to simplify FMA expressions in fast-math mode.
>>
>> Added:
>>     llvm/trunk/test/CodeGen/ARM/fp-fast.ll
>> Modified:
>>     llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=163051&r1=163050&r2=163051&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sat Sep  1 01:04:27 2012
>> @@ -5988,6 +5988,7 @@
>>    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
>>    ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
>>    EVT VT = N->getValueType(0);
>> +  DebugLoc dl = N->getDebugLoc();
>>
>>    if (N0CFP && N0CFP->isExactlyValue(1.0))
>>      return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2);
>> @@ -5998,6 +5999,58 @@
>>    if (N0CFP && !N1CFP)
>>      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2);
>>
>> +  // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
>> +  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
>> +      N2.getOpcode() == ISD::FMUL &&
>> +      N0 == N2.getOperand(0) &&
>> +      N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
>> +    return DAG.getNode(ISD::FMUL, dl, VT, N0,
>> +                       DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
>> +  }
>> +
>> +
>> +  // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
>> +  if (DAG.getTarget().Options.UnsafeFPMath &&
>> +      N0.getOpcode() == ISD::FMUL && N1CFP &&
>> +      N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
>> +    return DAG.getNode(ISD::FMA, dl, VT,
>> +                       N0.getOperand(0),
>> +                       DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
>> +                       N2);
>> +  }
>> +
>> +  // (fma x, 1, y) -> (fadd x, y)
>> +  // (fma x, -1, y) -> (fadd (fneg x), y)
>> +  if (N1CFP) {
>> +    if (N1CFP->isExactlyValue(1.0))
>> +      return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
>> +
>> +    if (N1CFP->isExactlyValue(-1.0) &&
>> +        (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
>> +      SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
>> +      AddToWorkList(RHSNeg.getNode());
>> +      return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
>> +    }
>> +  }
>> +
>> +  // (fma x, c, x) -> (fmul x, (c+1))
>> +  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) {
>> +    return DAG.getNode(ISD::FMUL, dl, VT,
>> +                       N0,
>> +                       DAG.getNode(ISD::FADD, dl, VT,
>> +                                   N1, DAG.getConstantFP(1.0, VT)));
>> +  }
>> +
>> +  // (fma x, c, (fneg x)) -> (fmul x, (c-1))
>> +  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
>> +      N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
>> +    return DAG.getNode(ISD::FMUL, dl, VT,
>> +                       N0,
>> +                       DAG.getNode(ISD::FADD, dl, VT,
>> +                                   N1, DAG.getConstantFP(-1.0, VT)));
>> +  }
>> +
>> +
>>    return SDValue();
>>  }
>>
>> @@ -6367,6 +6420,17 @@
>>      }
>>    }
>>
>> +  // (fneg (fmul c, x)) -> (fmul -c, x)
>> +  if (N0.getOpcode() == ISD::FMUL) {
>> +    ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
>> +    if (CFP1) {
>> +      return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
>> +                         N0.getOperand(0),
>> +                         DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
>> +                                     N0.getOperand(1)));
>> +    }
>> +  }
>> +
>>    return SDValue();
>>  }
>>
>>
>> Added: llvm/trunk/test/CodeGen/ARM/fp-fast.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fp-fast.ll?rev=163051&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/ARM/fp-fast.ll (added)
>> +++ llvm/trunk/test/CodeGen/ARM/fp-fast.ll Sat Sep  1 01:04:27 2012
>> @@ -0,0 +1,60 @@
>> +; RUN: llc -march=arm -mcpu=cortex-a9 -mattr=+vfp4 -enable-unsafe-fp-math < %s | FileCheck %s
>> +
>> +; CHECK: test1
>> +define float @test1(float %x) {
>> +; CHECK-NOT: vfma
>> +; CHECK: vmul.f32
>> +; CHECK-NOT: vfma
>> +  %t1 = fmul float %x, 3.0
>> +  %t2 = call float @llvm.fma.f32(float %x, float 2.0, float %t1)
>> +  ret float %t2
>> +}
>> +
>> +; CHECK: test2
>> +define float @test2(float %x, float %y) {
>> +; CHECK-NOT: vmul
>> +; CHECK: vfma.f32
>> +; CHECK-NOT: vmul
>> +  %t1 = fmul float %x, 3.0
>> +  %t2 = call float @llvm.fma.f32(float %t1, float 2.0, float %y)
>> +  ret float %t2
>> +}
>> +
>> +; CHECK: test3
>> +define float @test3(float %x, float %y) {
>> +; CHECK-NOT: vfma
>> +; CHECK: vadd.f32
>> +; CHECK-NOT: vfma
>> +  %t2 = call float @llvm.fma.f32(float %x, float 1.0, float %y)
>> +  ret float %t2
>> +}
>> +
>> +; CHECK: test4
>> +define float @test4(float %x, float %y) {
>> +; CHECK-NOT: vfma
>> +; CHECK: vsub.f32
>> +; CHECK-NOT: vfma
>> +  %t2 = call float @llvm.fma.f32(float %x, float -1.0, float %y)
>> +  ret float %t2
>> +}
>> +
>> +; CHECK: test5
>> +define float @test5(float %x) {
>> +; CHECK-NOT: vfma
>> +; CHECK: vmul.f32
>> +; CHECK-NOT: vfma
>> +  %t2 = call float @llvm.fma.f32(float %x, float 2.0, float %x)
>> +  ret float %t2
>> +}
>> +
>> +; CHECK: test6
>> +define float @test6(float %x) {
>> +; CHECK-NOT: vfma
>> +; CHECK: vmul.f32
>> +; CHECK-NOT: vfma
>> +  %t1 = fsub float -0.0, %x
>> +  %t2 = call float @llvm.fma.f32(float %x, float 5.0, float %t1)
>> +  ret float %t2
>> +}
>> +
>> +declare float @llvm.fma.f32(float, float, float)
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
>
>
> --
>  Anitha

-- 
 Anitha