[llvm] f8ef7c9 - [DAGCombiner] Require ninf for division estimation

Sun Jun 14 08:01:31 PDT 2020

Author: Qiu Chaofan
Date: 2020-06-14T22:58:22+08:00
New Revision: f8ef7c99a0199084609ce0e938ea1ff2c3636f09

URL: https://github.com/llvm/llvm-project/commit/f8ef7c99a0199084609ce0e938ea1ff2c3636f09
DIFF: https://github.com/llvm/llvm-project/commit/f8ef7c99a0199084609ce0e938ea1ff2c3636f09.diff

LOG: [DAGCombiner] Require ninf for division estimation

Current implementation of division estimation isn't correct for some
cases like 1.0/0.0 (result is nan, not expected inf).

And this change exposes a potential infinite loop: we use
isConstOrConstSplatFP in combineRepeatedFPDivisors to look up if the
divisor is some constant. But it doesn't work after legalized on some
platforms. This patch restricts the method to act before LegalDAG.

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D80542

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AMDGPU/fdiv.ll
    llvm/test/CodeGen/PowerPC/combine-fneg.ll
    llvm/test/CodeGen/PowerPC/fdiv.ll
    llvm/test/CodeGen/PowerPC/qpx-recipest.ll
    llvm/test/CodeGen/PowerPC/recipest.ll
    llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
    llvm/test/CodeGen/PowerPC/vsx-recip-est.ll
    llvm/test/CodeGen/X86/fdiv-combine-vec.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 38fa7319ac18..868f489b5a23 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13032,7 +13032,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
   //       that only minsize should restrict this.
   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
   const SDNodeFlags Flags = N->getFlags();
-  if (!UnsafeMath && !Flags.hasAllowReciprocal())
+  if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
     return SDValue();
 
   // Skip if current node is a reciprocal/fneg-reciprocal.
@@ -13186,8 +13186,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
     }
 
     // Fold into a reciprocal estimate and multiply instead of a real divide.
-    if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
-      return RV;
+    if (Options.NoInfsFPMath || Flags.hasNoInfs())
+      if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
+        return RV;
   }
 
   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)

diff  --git a/llvm/test/CodeGen/AMDGPU/fdiv.ll b/llvm/test/CodeGen/AMDGPU/fdiv.ll
index 1986ecf20929..e6df20625beb 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv.ll
@@ -32,7 +32,7 @@
 ; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
 define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 {
 entry:
-  %fdiv = fdiv float %a, %b
+  %fdiv = fdiv ninf float %a, %b
   store float %fdiv, float addrspace(1)* %out
   ret void
 }
@@ -152,7 +152,7 @@ entry:
 ; GCN: buffer_store_dword [[RESULT]]
 define amdgpu_kernel void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) #0 {
 entry:
-  %fdiv = fdiv arcp float %a, %b
+  %fdiv = fdiv arcp ninf float %a, %b
   store float %fdiv, float addrspace(1)* %out
   ret void
 }
@@ -210,7 +210,7 @@ entry:
 ; GCN: v_rcp_f32
 define amdgpu_kernel void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
 entry:
-  %fdiv = fdiv arcp <2 x float> %a, %b
+  %fdiv = fdiv arcp ninf <2 x float> %a, %b
   store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
   ret void
 }
@@ -279,7 +279,7 @@ define amdgpu_kernel void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out,
   %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
   %a = load <4 x float>, <4 x float> addrspace(1) * %in
   %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
-  %result = fdiv arcp <4 x float> %a, %b
+  %result = fdiv arcp ninf <4 x float> %a, %b
   store <4 x float> %result, <4 x float> addrspace(1)* %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/PowerPC/combine-fneg.ll b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
index 65e884483a3c..6f81614881ce 100644
--- a/llvm/test/CodeGen/PowerPC/combine-fneg.ll
+++ b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
@@ -23,7 +23,7 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
 entry:
   %splat.splatinsert = insertelement <4 x double> undef, double %a0, i32 0
   %splat.splat = shufflevector <4 x double> %splat.splatinsert, <4 x double> undef, <4 x i32> zeroinitializer
-  %div = fdiv reassoc nsz arcp <4 x double> %a1, %splat.splat
+  %div = fdiv reassoc nsz arcp ninf <4 x double> %a1, %splat.splat
   %sub = fsub reassoc nsz <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, %div
   ret <4 x double> %sub
 }

diff  --git a/llvm/test/CodeGen/PowerPC/fdiv.ll b/llvm/test/CodeGen/PowerPC/fdiv.ll
index b2004fe471b8..67d29af7d32f 100644
--- a/llvm/test/CodeGen/PowerPC/fdiv.ll
+++ b/llvm/test/CodeGen/PowerPC/fdiv.ll
@@ -1,6 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
 
+define dso_local float @foo_nosw(float %0, float %1) local_unnamed_addr {
+; CHECK-LABEL: foo_nosw:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xsdivsp 1, 1, 2
+; CHECK-NEXT:    blr
+  %3 = fdiv reassoc arcp nsz float %0, %1
+  ret float %3
+}
+
 define dso_local float @foo(float %0, float %1) local_unnamed_addr {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0:
@@ -10,6 +19,6 @@ define dso_local float @foo(float %0, float %1) local_unnamed_addr {
 ; CHECK-NEXT:    xsmaddasp 0, 3, 1
 ; CHECK-NEXT:    fmr 1, 0
 ; CHECK-NEXT:    blr
-  %3 = fdiv reassoc arcp nsz float %0, %1
+  %3 = fdiv reassoc arcp nsz ninf float %0, %1
   ret float %3
 }

diff  --git a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll
index 7e639e03ad04..498ab62819ce 100644
--- a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll
@@ -236,7 +236,7 @@ define <4 x double> @foo2_fmf(<4 x double> %a, <4 x double> %b) nounwind {
 ; CHECK-NEXT:    qvfmadd 1, 0, 1, 3
 ; CHECK-NEXT:    blr
 entry:
-  %r = fdiv arcp reassoc nsz <4 x double> %a, %b
+  %r = fdiv arcp reassoc nsz ninf <4 x double> %a, %b
   ret <4 x double> %r
 }
 
@@ -272,7 +272,7 @@ define <4 x float> @goo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
 ; CHECK-NEXT:    qvfmadds 1, 0, 1, 3
 ; CHECK-NEXT:    blr
 entry:
-  %r = fdiv arcp reassoc <4 x float> %a, %b
+  %r = fdiv arcp reassoc ninf <4 x float> %a, %b
   ret <4 x float> %r
 }
 

diff  --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll
index ebece0081a62..042bfc99bb58 100644
--- a/llvm/test/CodeGen/PowerPC/recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/recipest.ll
@@ -431,7 +431,7 @@ define float @rsqrt_fmul_fmf(float %a, float %b, float %c) {
 ; CHECK-P9-NEXT:    blr
   %x = call reassoc arcp nsz float @llvm.sqrt.f32(float %a)
   %y = fmul reassoc nsz float %x, %b
-  %z = fdiv reassoc arcp nsz float %c, %y
+  %z = fdiv reassoc arcp nsz ninf float %c, %y
   ret float %z
 }
 
@@ -602,7 +602,7 @@ define double @foo2_fmf(double %a, double %b) nounwind {
 ; CHECK-P9-NEXT:    xsmaddadp 0, 3, 1
 ; CHECK-P9-NEXT:    fmr 1, 0
 ; CHECK-P9-NEXT:    blr
-  %r = fdiv reassoc arcp nsz double %a, %b
+  %r = fdiv reassoc arcp nsz ninf double %a, %b
   ret double %r
 }
 
@@ -651,7 +651,7 @@ define float @goo2_fmf(float %a, float %b) nounwind {
 ; CHECK-P9-NEXT:    xsmaddasp 0, 3, 1
 ; CHECK-P9-NEXT:    fmr 1, 0
 ; CHECK-P9-NEXT:    blr
-  %r = fdiv reassoc arcp nsz float %a, %b
+  %r = fdiv reassoc arcp nsz ninf float %a, %b
   ret float %r
 }
 
@@ -705,7 +705,7 @@ define <4 x float> @hoo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
 ; CHECK-P9-NEXT:    xvmaddasp 0, 1, 34
 ; CHECK-P9-NEXT:    xxlor 34, 0, 0
 ; CHECK-P9-NEXT:    blr
-  %r = fdiv reassoc arcp nsz <4 x float> %a, %b
+  %r = fdiv reassoc arcp nsz ninf <4 x float> %a, %b
   ret <4 x float> %r
 }
 

diff  --git a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
index 0d9a67449bb7..bb3ca9d84c6a 100644
--- a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
+++ b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
@@ -1,15 +1,38 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- < %s | FileCheck %s
 
-define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
-; CHECK-LABEL: repeated_fp_divisor:
+; Check if this causes infinite loop when estimation disabled
+define <4 x float> @repeated_fp_divisor_noest(float %a, <4 x float> %b) {
+; CHECK-LABEL: repeated_fp_divisor_noest:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xscvdpspn 0, 1
+; CHECK-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
+; CHECK-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
+; CHECK-NEXT:    lvx 3, 0, 3
 ; CHECK-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
+; CHECK-NEXT:    xxspltw 0, 0, 0
+; CHECK-NEXT:    xvdivsp 0, 35, 0
 ; CHECK-NEXT:    lvx 3, 0, 3
-; CHECK-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
+; CHECK-NEXT:    xvmulsp 1, 34, 35
+; CHECK-NEXT:    xvmulsp 34, 1, 0
+; CHECK-NEXT:    blr
+  %ins = insertelement <4 x float> undef, float %a, i32 0
+  %splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer
+  %t1 = fmul reassoc <4 x float> %b, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0x3FF028F5C0000000>
+  %mul = fdiv reassoc arcp nsz <4 x float> %t1, %splat
+  ret <4 x float> %mul
+}
+
+define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
+; CHECK-LABEL: repeated_fp_divisor:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xscvdpspn 0, 1
+; CHECK-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
+; CHECK-NEXT:    addi 3, 3, .LCPI1_0 at toc@l
+; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    addis 3, 2, .LCPI1_1 at toc@ha
+; CHECK-NEXT:    addi 3, 3, .LCPI1_1 at toc@l
 ; CHECK-NEXT:    lvx 4, 0, 3
 ; CHECK-NEXT:    xxspltw 0, 0, 0
 ; CHECK-NEXT:    xvresp 1, 0
@@ -21,7 +44,7 @@ define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
   %ins = insertelement <4 x float> undef, float %a, i32 0
   %splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer
   %t1 = fmul reassoc <4 x float> %b, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0x3FF028F5C0000000>
-  %mul = fdiv reassoc arcp nsz <4 x float> %t1, %splat
+  %mul = fdiv reassoc arcp nsz ninf <4 x float> %t1, %splat
   ret <4 x float> %mul
 }
 

diff  --git a/llvm/test/CodeGen/PowerPC/vsx-recip-est.ll b/llvm/test/CodeGen/PowerPC/vsx-recip-est.ll
index f6a6a3172235..969f480069d1 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-recip-est.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-recip-est.ll
@@ -10,7 +10,7 @@ define float @emit_xsresp() {
 entry:
   %0 = load float, float* @a, align 4
   %1 = load float, float* @b, align 4
-  %div = fdiv arcp float %0, %1
+  %div = fdiv arcp ninf float %0, %1
   ret float %div
 ; CHECK-LABEL: @emit_xsresp
 ; CHECK: xsresp {{[0-9]+}}
@@ -38,7 +38,7 @@ define double @emit_xsredp() {
 entry:
   %0 = load double, double* @c, align 8
   %1 = load double, double* @d, align 8
-  %div = fdiv arcp double %0, %1
+  %div = fdiv arcp ninf double %0, %1
   ret double %div
 ; CHECK-LABEL: @emit_xsredp
 ; CHECK: xsredp {{[0-9]+}}

diff  --git a/llvm/test/CodeGen/X86/fdiv-combine-vec.ll b/llvm/test/CodeGen/X86/fdiv-combine-vec.ll
index 825f8a50f96d..4901007f60f9 100644
--- a/llvm/test/CodeGen/X86/fdiv-combine-vec.ll
+++ b/llvm/test/CodeGen/X86/fdiv-combine-vec.ll
@@ -120,7 +120,7 @@ define <4 x float> @splat_fdiv_v4f32_estimate(<4 x float> %x, float %y) #0 {
 ; AVX-NEXT:    retq
   %vy = insertelement <4 x float> undef, float %y, i32 0
   %splaty = shufflevector <4 x float> %vy, <4 x float> undef, <4 x i32> zeroinitializer
-  %r = fdiv arcp reassoc <4 x float> %x, %splaty
+  %r = fdiv arcp reassoc ninf <4 x float> %x, %splaty
   ret <4 x float> %r
 }