[llvm-branch-commits] [llvm] AMDGPU: Handle v_mfma_f64_16x16x4_f64 srcc write VGPR hazard change for gfx950 (PR #117283)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Nov 21 20:47:08 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
Read by sgemm/dgemm in srcc after v_mfma_f64_16x16x4_f64 increases from 9 to 17
wait states.
---
Full diff: https://github.com/llvm/llvm-project/pull/117283.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+5-1)
- (modified) llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir (+33-12)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index be0936ce74835f..4a4c9788b3d881 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -2302,6 +2302,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
const int SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates = 9;
const int SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates = 17;
const int DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 9;
+ const int GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 17;
const int DMFMA4x4WritesVGPROverlappedSrcCWaitStates = 4;
const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5;
const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11;
@@ -2359,7 +2360,10 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
if (!isXDL(ST, *MI))
- NeedWaitStates = DMFMA16x16WritesVGPROverlappedSrcCWaitStates;
+ NeedWaitStates =
+ ST.hasGFX950Insts()
+ ? GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates
+ : DMFMA16x16WritesVGPROverlappedSrcCWaitStates;
break;
case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:
diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
index b9135dbd46fc1f..1499fd4907a181 100644
--- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
+++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
@@ -298,8 +298,12 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 0
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap
body: |
@@ -319,8 +323,12 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 0
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap
body: |
@@ -549,8 +557,12 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 2
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap
body: |
@@ -1333,8 +1345,12 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 0
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 0
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_agpr_mfma_read_overlap
body: |
@@ -1354,8 +1370,13 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 0
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 0
+
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap
body: |
@@ -2502,8 +2523,8 @@ body: |
...
# GCN-LABEL: name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc
# GCN: V_MFMA
-# GFX940: S_NOP 4
-# GFX950: S_NOP 5
+# GFX940-NEXT: S_NOP 4
+# GFX950-NEXT: S_NOP 5
# GCN-NEXT: V_SMFMAC_
name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc
body: |
``````````
</details>
https://github.com/llvm/llvm-project/pull/117283
More information about the llvm-branch-commits
mailing list