[PATCH] D60633: [AMDGPU] Avoid DAG combining assert with fneg(fadd(A,0))

Fri Apr 12 13:16:24 PDT 2019

tpr created this revision.
Herald added subscribers: llvm-commits, t-tye, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl, arsenm.
Herald added a project: LLVM.

fneg combining attempts to turn it into fadd(fneg(A), fneg(0)), but
creating the new fadd folds to just fneg(A). When A has multiple uses,
this confuses it and you get an assert. Fixed.

Change-Id: I0ddc9b7286abe78edc0cd8d734fdeb05ff09821c


Repository:
  rL LLVM

https://reviews.llvm.org/D60633

Files:
  lib/Target/AMDGPU/AMDGPUISelLowering.cpp
  test/CodeGen/AMDGPU/assert-combine-fneg-fadd-0.ll


Index: test/CodeGen/AMDGPU/assert-combine-fneg-fadd-0.ll
===================================================================

--- /dev/null
+++ test/CodeGen/AMDGPU/assert-combine-fneg-fadd-0.ll
@@ -0,0 +1,45 @@
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -enable-no-signed-zeros-fp-math -stop-after=amdgpu-isel -o - <%s | FileCheck -check-prefix=GCN %s
+
+; Check that this does not assert when trying to combine an fneg of a
+; multi-use fadd of constant 0.
+
+; GCN-LABEL: body:
+
+define amdgpu_ps void @_amdgpu_ps_main(<4 x i32> %arg) local_unnamed_addr {
+.entry:
+  %tmp = tail call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %arg, i32 224, i32 0)
+  %tmp1 = bitcast i32 %tmp to float
+  %tmp2 = tail call float @llvm.maxnum.f32(float %tmp1, float 0.000000e+00)
+  %tmp3 = tail call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %arg, i32 144, i32 0)
+  %tmp4 = bitcast i32 %tmp3 to float
+  %tmp5 = fdiv float 1.000000e+00, %tmp4
+  %tmp6 = fmul float %tmp5, 0.000000e+00
+  %.i067 = or i1 false, undef
+  %.i070 = select i1 %.i067, float %tmp2, float undef
+  %.i081 = or i1 false, undef
+  %.i084 = select i1 %.i081, float %.i070, float undef
+  %tmp7 = fdiv float 1.000000e+00, %tmp6
+  %tmp8 = fmul float 0.000000e+00, %tmp7
+  %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
+  %.i188 = fadd float %tmp9, 0.000000e+00
+  %tmp10 = fcmp uge float %.i188, %.i084
+  %tmp11 = fsub float -0.000000e+00, %.i188
+  %.i089 = or i1 false, %tmp10
+  %.i092 = select i1 %.i089, float %.i084, float %tmp11
+  %tmp12 = fcmp ule float %.i092, 0.000000e+00
+  %tmp13 = fmul reassoc nnan arcp contract float undef, -5.000000e-01
+  %.i095 = or i1 false, %tmp12
+  %.i198 = select i1 %.i095, float 0.000000e+00, float %tmp13
+  %tmp14 = tail call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float undef, float %.i198, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
+  %tmp15 = extractelement <4 x float> %tmp14, i32 1
+  %tmp16 = tail call float @llvm.log2.f32(float %tmp15)
+  %.i1246 = fmul float %tmp16, 0x40019999A0000000
+  tail call void @llvm.amdgcn.exp.compr.v2f16(i32 immarg 0, i32 immarg 15, <2 x half> undef, <2 x half> undef, i1 immarg true, i1 immarg true)
+  ret void
+}
+
+declare float @llvm.maxnum.f32(float, float)
+declare float @llvm.log2.f32(float)
+declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg)
+declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
+declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg)
Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3712,6 +3712,8 @@
       RHS = RHS.getOperand(0);
 
     SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS, N0->getFlags());
+    if (Res.getOpcode() == ISD::FNEG)
+      return SDValue(); // Op got folded away.
     if (!N0.hasOneUse())
       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
     return Res;
@@ -3731,6 +3733,8 @@
       RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
 
     SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS, N0->getFlags());
+    if (Res.getOpcode() == ISD::FNEG)
+      return SDValue(); // Op got folded away.
     if (!N0.hasOneUse())
       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
     return Res;
@@ -3758,6 +3762,8 @@
       RHS = RHS.getOperand(0);
 
     SDValue Res = DAG.getNode(Opc, SL, VT, LHS, MHS, RHS);
+    if (Res.getOpcode() == ISD::FNEG)
+      return SDValue(); // Op got folded away.
     if (!N0.hasOneUse())
       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
     return Res;
@@ -3786,6 +3792,8 @@
     unsigned Opposite = inverseMinMax(Opc);
 
     SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags());
+    if (Res.getOpcode() == ISD::FNEG)
+      return SDValue(); // Op got folded away.
     if (!N0.hasOneUse())
       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
     return Res;
@@ -3796,6 +3804,8 @@
       Ops[I] = DAG.getNode(ISD::FNEG, SL, VT, N0->getOperand(I), N0->getFlags());
 
     SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags());
+    if (Res.getOpcode() == ISD::FNEG)
+      return SDValue(); // Op got folded away.
     if (!N0.hasOneUse())
       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
     return Res;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D60633.194945.patch
Type: text/x-patch
Size: 4619 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190412/66e799b1/attachment.bin>