[llvm] 3c8e055 - [AMDGPU] Fix mfma scheduling crash

Thu Oct 24 11:02:24 PDT 2019

Author: Stanislav Mekhanoshin
Date: 2019-10-24T11:01:52-07:00
New Revision: 3c8e055187d8adf1834cdc735d82df5529fbbd86

URL: https://github.com/llvm/llvm-project/commit/3c8e055187d8adf1834cdc735d82df5529fbbd86
DIFF: https://github.com/llvm/llvm-project/commit/3c8e055187d8adf1834cdc735d82df5529fbbd86.diff

LOG: [AMDGPU] Fix mfma scheduling crash

An SUnit can be neither intruction not SDNode. It is all
null if represents a nop. Fixed a crash on using SU->getInstr().

Differential Revision: https://reviews.llvm.org/D69395

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
    llvm/test/CodeGen/AMDGPU/mfma-loop.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 3bb6dd4571c0..46e4d76367f0 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -773,6 +773,11 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
     return MI && TII->isSALU(*MI) && !MI->isTerminator();
   }
 
+  bool isVALU(const SUnit *SU) const {
+    const MachineInstr *MI = SU->getInstr();
+    return MI && TII->isVALU(*MI);
+  }
+
   bool canAddEdge(const SUnit *Succ, const SUnit *Pred) const {
     if (Pred->NodeNum < Succ->NodeNum)
       return true;
@@ -821,7 +826,7 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
 
       for (SDep &SI : From->Succs) {
         SUnit *SUv = SI.getSUnit();
-        if (SUv != From && TII->isVALU(*SUv->getInstr()) && canAddEdge(SUv, SU))
+        if (SUv != From && isVALU(SUv) && canAddEdge(SUv, SU))
           SUv->addPred(SDep(SU, SDep::Artificial), false);
       }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
index a67aadfcd27c..90c64763899c 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
@@ -76,5 +76,39 @@ exit:
   ret void
 }
 
+; GCN-LABEL: {{^}}test_mfma_loop_mfma_forward_init:
+
+; GCN-COUNT32: v_accvgpr_write_b32 a{{[0-9]+}}, 0
+; GCN: v_mfma_f32_32x32x1f32
+; GCN-NOT: v_accvgpr
+
+; GCN: [[LOOP:BB[0-9_]+]]:
+; GCN-NOT: v_accvgpr
+; GCN: v_mfma_f32_32x32x1f32
+; GCN-NOT: v_accvgpr
+; GCN: s_cbranch_scc1 [[LOOP]]
+
+; GCN-COUNT32: v_accvgpr_read_b32
+define amdgpu_kernel void @test_mfma_loop_mfma_forward_init(<32 x float> addrspace(1)* %arg) {
+entry:
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %init = bitcast i32 %tid to float
+  %mai.0 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> zeroinitializer, i32 0, i32 0, i32 0)
+
+  br label %for.cond.preheader
+
+for.cond.preheader:
+  %phi = phi <32 x float> [ %mai.0, %entry ], [ %mai.1, %for.cond.preheader ]
+  %c = phi i32 [ 0, %entry ], [ %inc, %for.cond.preheader ]
+  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %phi, i32 0, i32 0, i32 0)
+  %inc = add nuw nsw i32 %c, 1
+  %cc = icmp eq i32 %inc, 16
+  br i1 %cc, label %exit, label %for.cond.preheader
+
+exit:
+  store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg
+  ret void
+}
+
 declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
 declare i32 @llvm.amdgcn.workitem.id.x()