[llvm] 6e3d2a4 - [ISel] Fix another crash in new FMA DAG combine (#67818)

via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 29 09:18:27 PDT 2023


Author: Jay Foad
Date: 2023-09-29T17:18:23+01:00
New Revision: 6e3d2a4b388288f32ed0441e5c9d1a1e3f886ad7

URL: https://github.com/llvm/llvm-project/commit/6e3d2a4b388288f32ed0441e5c9d1a1e3f886ad7
DIFF: https://github.com/llvm/llvm-project/commit/6e3d2a4b388288f32ed0441e5c9d1a1e3f886ad7.diff

LOG: [ISel] Fix another crash in new FMA DAG combine (#67818)

Following on from D135150, this patch fixes another crash caused by this
DAG combine:

fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)

The combine calls ReplaceAllUsesOfValueWith to replace (fmul C, D) with
(fma C, D, E). This can cause nodes to get CSEd. In D135150 the problem
was that the (fma C, D, E) node got CSEd away. In this new case, the
problem is that the outer fadd node gets CSEd away. To fix it we have
to return SDValue(N, 0) from the combine and be careful not to add a
deleted node to the worklist.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 67d9c5f220e470e..542ed37f904ba89 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15496,7 +15496,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
         DAG.ReplaceAllUsesOfValueWith(FMul, CDE);
         // Replacing the inner FMul could cause the outer FMA to be simplified
         // away.
-        return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue() : FMA;
+        return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
       }
 
       TmpFMA = TmpFMA->getOperand(2);
@@ -16054,7 +16054,8 @@ SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
 
   // FADD -> FMA combines:
   if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
-    AddToWorklist(Fused.getNode());
+    if (Fused.getOpcode() != ISD::DELETED_NODE)
+      AddToWorklist(Fused.getNode());
     return Fused;
   }
   return SDValue();
@@ -16246,7 +16247,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
 
   // FADD -> FMA combines:
   if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
-    AddToWorklist(Fused.getNode());
+    if (Fused.getOpcode() != ISD::DELETED_NODE)
+      AddToWorklist(Fused.getNode());
     return Fused;
   }
   return SDValue();

diff  --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll
index 36b46140e41e585..23f1fa5774639e7 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll
@@ -78,3 +78,40 @@ bb17:
   %i19 = phi float [ %i12, %bb11 ], [ 0.000000e+00, %bb15 ]
   ret void
 }
+
+define float @test2(float %arg, float %arg1) {
+  ; CHECK-LABEL: name: test2
+  ; CHECK: bb.0.bb:
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1082130432, implicit $exec
+  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 1120534528
+  ; CHECK-NEXT:   [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = nsz contract reassoc nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, killed [[S_MOV_B32_]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+  ; CHECK-NEXT:   [[V_FMAC_F32_e64_1:%[0-9]+]]:vgpr_32 = nsz contract reassoc nofpexcept V_FMAC_F32_e64 0, [[COPY1]], 0, killed [[S_MOV_B32_1]], 0, [[V_FMAC_F32_e64_]], 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nsz contract reassoc nofpexcept V_ADD_F32_e64 0, [[V_FMAC_F32_e64_1]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[V_FMAC_F32_e64_1]], 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   [[V_RCP_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e64 0, killed [[V_ADD_F32_e64_]], 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[V_RCP_F32_e64_1]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, killed [[V_MUL_F32_e64_]], 0, [[V_RCP_F32_e64_1]], 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_MAX_F32_e64_]], 0, killed [[V_RCP_F32_e64_]], 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   $vgpr0 = COPY [[V_ADD_F32_e64_1]]
+  ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
+bb:
+  %i = fmul contract float %arg1, 1.000000e+02
+  %i2 = fmul contract float %arg, 0.000000e+00
+  %i3 = fadd reassoc nsz contract float %i, %arg1
+  %i4 = fadd nsz contract float %i3, %i2
+  %i5 = fsub reassoc nsz contract float 1.000000e+00, %i4
+  %i6 = fdiv afn float 1.000000e+00, %i5
+  %i7 = fneg float %arg
+  %i9 = fmul float %i6, %i7
+  %i10 = fmul float %i6, -1.000000e+00
+  %i13 = call float @llvm.maxnum.f32(float %i9, float %i10)
+  %ret = fadd float %i10, %i13
+  ret float %ret
+}
+
+declare float @llvm.maxnum.f32(float, float)


        


More information about the llvm-commits mailing list