[llvm] AMDGPU: Handle v_mfma_f64_16x16x4_f64 srcc write VGPR hazard change for gfx950 (PR #117283)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 22 20:24:12 PST 2024


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117283

>From c83ff22c7a0dde1b30d264d8d454c6aaa3c22532 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 7 Mar 2024 14:54:41 +0530
Subject: [PATCH] AMDGPU: Handle v_mfma_f64_16x16x4_f64 srcc write VGPR hazard
 change for gfx950

Read by sgemm/dgemm in srcc after v_mfma_f64_16x16x4_f64 increases from 9 to 17
wait states.
---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp |  6 ++-
 .../CodeGen/AMDGPU/mai-hazards-gfx940.mir     | 45 ++++++++++++++-----
 2 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index be0936ce74835f..4a4c9788b3d881 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -2302,6 +2302,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
     const int SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates = 9;
     const int SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates = 17;
     const int DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 9;
+    const int GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 17;
     const int DMFMA4x4WritesVGPROverlappedSrcCWaitStates = 4;
     const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5;
     const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11;
@@ -2359,7 +2360,10 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
         case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
         case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
           if (!isXDL(ST, *MI))
-            NeedWaitStates = DMFMA16x16WritesVGPROverlappedSrcCWaitStates;
+            NeedWaitStates =
+                ST.hasGFX950Insts()
+                    ? GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates
+                    : DMFMA16x16WritesVGPROverlappedSrcCWaitStates;
           break;
         case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
         case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:
diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
index b9135dbd46fc1f..1499fd4907a181 100644
--- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
+++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
@@ -298,8 +298,12 @@ body:             |
 ...
 # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 0
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 0
 # GCN-NEXT: V_MFMA
 name:            dgemm16x16_mfma_write_vgpr_mfma_read_overlap
 body:             |
@@ -319,8 +323,12 @@ body:             |
 ...
 # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 0
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 0
 # GCN-NEXT: V_MFMA
 name:            dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap
 body:             |
@@ -549,8 +557,12 @@ body:             |
 ...
 # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 2
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 0
 # GCN-NEXT: V_MFMA
 name:            dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap
 body:             |
@@ -1333,8 +1345,12 @@ body:             |
 ...
 # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 0
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 0
 # GCN-NEXT: V_MFMA
 name:            dgemm16x16_mfma_write_agpr_mfma_read_overlap
 body:             |
@@ -1354,8 +1370,13 @@ body:             |
 ...
 # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 0
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 0
+
 # GCN-NEXT: V_MFMA
 name:            dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap
 body:             |
@@ -2502,8 +2523,8 @@ body:             |
 ...
 # GCN-LABEL: name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc
 # GCN:      V_MFMA
-# GFX940: S_NOP 4
-# GFX950: S_NOP 5
+# GFX940-NEXT: S_NOP 4
+# GFX950-NEXT: S_NOP 5
 # GCN-NEXT: V_SMFMAC_
 name:            xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc
 body:             |



More information about the llvm-commits mailing list