[llvm-commits] [llvm] r154265 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/ARM/vdiv_combine.ll test/CodeGen/X86/fdiv.ll

Sat Apr 7 14:44:27 PDT 2012

On 07.04.2012, at 22:04, Duncan Sands wrote:

> Author: baldrick
> Date: Sat Apr  7 15:04:00 2012
> New Revision: 154265
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=154265&view=rev
> Log:
> Convert floating point division by a constant into multiplication by the
> reciprocal if converting to the reciprocal is exact.  Do it even if inexact
> if -ffast-math.  This substantially speeds up ac.f90 from the polyhedron
> benchmarks.
> 
> Added:
>    llvm/trunk/test/CodeGen/X86/fdiv.ll
> Modified:
>    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>    llvm/trunk/test/CodeGen/ARM/vdiv_combine.ll
> 
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=154265&r1=154264&r2=154265&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sat Apr  7 15:04:00 2012
> @@ -5725,6 +5725,19 @@
>   if (N0CFP && N1CFP && VT != MVT::ppcf128)
>     return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
> 
> +  // fold (fdiv X, c2) -> fmul X, 1/c2 if there is no precision loss or if
> +  // losing precision is acceptable.
> +  if (N1CFP && VT != MVT::ppcf128) {
> +    // Compute the reciprocal 1.0 / c2.
> +    APFloat N1APF = N1CFP->getValueAPF();
> +    APFloat Recip(N1APF.getSemantics(), 1); // 1.0
> +    APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
> +    // Only do the transform if the reciprocal is not too horrible (eg not NaN).
> +    if (st == APFloat::opOK || (st == APFloat::opInexact &&
> +                                DAG.getTarget().Options.UnsafeFPMath))
> +      return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0,
> +                         DAG.getConstantFP(Recip, VT));
> +  }

One thing to keep in mind is that we don't want to turn fdivs into fmuls with a denormal number. Denormals may be disabled on the CPU (ops always return 0.0), in other cases they are just painfully slow.

I have a bad feeling about allowing any reciprocal in safe math mode if the division was exact, this is only safe for powers of two in general (see APFloat::getExactInverse).

- Ben

>   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
>   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
> 
> Modified: llvm/trunk/test/CodeGen/ARM/vdiv_combine.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vdiv_combine.ll?rev=154265&r1=154264&r2=154265&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/ARM/vdiv_combine.ll (original)
> +++ llvm/trunk/test/CodeGen/ARM/vdiv_combine.ll Sat Apr  7 15:04:00 2012
> @@ -8,7 +8,7 @@
> 
> ; Test signed conversion.
> ; CHECK: t1
> -; CHECK-NOT: vdiv
> +; CHECK-NOT: {{vdiv|vmul}}
> define void @t1() nounwind {
> entry:
>   %tmp = load i32* @iin, align 4, !tbaa !3
> @@ -24,7 +24,7 @@
> 
> ; Test unsigned conversion.
> ; CHECK: t2
> -; CHECK-NOT: vdiv
> +; CHECK-NOT: {{vdiv|vmul}}
> define void @t2() nounwind {
> entry:
>   %tmp = load i32* @uin, align 4, !tbaa !3
> @@ -38,7 +38,7 @@
> 
> ; Test which should not fold due to non-power of 2.
> ; CHECK: t3
> -; CHECK: vdiv
> +; CHECK: {{vdiv|vmul}}
> define void @t3() nounwind {
> entry:
>   %tmp = load i32* @iin, align 4, !tbaa !3
> @@ -52,7 +52,7 @@
> 
> ; Test which should not fold due to power of 2 out of range.
> ; CHECK: t4
> -; CHECK: vdiv
> +; CHECK: {{vdiv|vmul}}
> define void @t4() nounwind {
> entry:
>   %tmp = load i32* @iin, align 4, !tbaa !3
> @@ -66,7 +66,7 @@
> 
> ; Test case where const is max power of 2 (i.e., 2^32).
> ; CHECK: t5
> -; CHECK-NOT: vdiv
> +; CHECK-NOT: {{vdiv|vmul}}
> define void @t5() nounwind {
> entry:
>   %tmp = load i32* @iin, align 4, !tbaa !3
> @@ -80,7 +80,7 @@
> 
> ; Test quadword.
> ; CHECK: t6
> -; CHECK-NOT: vdiv
> +; CHECK-NOT: {{vdiv|vmul}}
> define void @t6() nounwind {
> entry:
>   %tmp = load i32* @iin, align 4, !tbaa !3
> 
> Added: llvm/trunk/test/CodeGen/X86/fdiv.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fdiv.ll?rev=154265&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/fdiv.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/fdiv.ll Sat Apr  7 15:04:00 2012
> @@ -0,0 +1,32 @@
> +; RUN: llc < %s -march=x86-64 | FileCheck %s
> +; RUN: llc < %s -march=x86-64 -enable-unsafe-fp-math | FileCheck -check-prefix=UNSAFE %s
> +
> +define double @exact(double %x) {
> +; Exact division by a constant always converted to multiplication.
> +; CHECK: @exact
> +; CHECK: mulsd
> +; UNSAFE: @exact
> +; UNSAFE: mulsd
> +  %div = fdiv double %x, 2.0
> +  ret double %div
> +}
> +
> +define double @inexact(double %x) {
> +; Inexact division by a constant converted to multiplication if unsafe-math.
> +; CHECK: @inexact
> +; CHECK: divsd
> +; UNSAFE: @inexact
> +; UNSAFE: mulsd
> +  %div = fdiv double %x, 0x41DFFFFFFFC00000 
> +  ret double %div
> +}
> +
> +define double @funky(double %x) {
> +; No conversion to multiplication if too funky.
> +; CHECK: @funky
> +; CHECK: divsd
> +; UNSAFE: @funky
> +; UNSAFE: divsd
> +  %div = fdiv double %x, 0.0
> +  ret double %div
> +}
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits