[llvm] 85601fd - AMDGPU: Handle v_mfma_f64_16x16x4_f64 write VGPR read srca/srcb hazard change for gfx950 (#117284)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 22 20:30:09 PST 2024
Author: Matt Arsenault
Date: 2024-11-22T20:30:06-08:00
New Revision: 85601fd78f4cbf0ce5df74c5926183035f859572
URL: https://github.com/llvm/llvm-project/commit/85601fd78f4cbf0ce5df74c5926183035f859572
DIFF: https://github.com/llvm/llvm-project/commit/85601fd78f4cbf0ce5df74c5926183035f859572.diff
LOG: AMDGPU: Handle v_mfma_f64_16x16x4_f64 write VGPR read srca/srcb hazard change for gfx950 (#117284)
Increase in wait states from 11 to 19. The index for smfmac counts as like srcA/srcB.
Added:
Modified:
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 4a4c9788b3d881..218f487f7e12ce 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -2309,6 +2309,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19;
const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6;
const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11;
+ const int GFX950_DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 19;
const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4;
const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2;
const int MaxWaitStates = 19;
@@ -2414,7 +2415,10 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64:
case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
- NeedWaitStates = DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates;
+ NeedWaitStates =
+ ST.hasGFX950Insts()
+ ? GFX950_DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates
+ : DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates;
break;
case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:
diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
index 1499fd4907a181..2ba873f55a1eb0 100644
--- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
+++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
@@ -536,8 +536,12 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 2
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap
body: |
@@ -562,7 +566,7 @@ body: |
# GFX950-NEXT: S_NOP 7
# GFX950-NEXT: S_NOP 7
-# GFX950-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap
body: |
@@ -632,8 +636,12 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 2
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap
body: |
@@ -643,8 +651,12 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 2
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 2
# GCN-NEXT: V_SMFMAC
name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap
body: |
@@ -654,8 +666,13 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 2
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 2
+
# GCN-NEXT: V_SMFMAC
name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap
body: |
@@ -1452,8 +1469,12 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 2
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap
body: |
@@ -1473,8 +1494,13 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 2
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 2
+
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap
body: |
@@ -1504,8 +1530,12 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 2
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap
body: |
More information about the llvm-commits
mailing list