[llvm-branch-commits] [llvm] AMDGPU: Handle v_mfma_f64_16x16x4_f64 write VGPR read srca/srcb hazard change for gfx950 (PR #117284)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Nov 22 12:16:21 PST 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117284
>From 26c86acea9e387b14e081f11ebb09bea004ee694 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 7 Mar 2024 15:18:59 +0530
Subject: [PATCH] AMDGPU: Handle v_mfma_f64_16x16x4_f64 write VGPR read
srca/srcb hazard change for gfx950
Increase in wait states from 11 to 19. The index for smfmac counts as like srcA/srcB.
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 6 +-
.../CodeGen/AMDGPU/mai-hazards-gfx940.mir | 60 ++++++++++++++-----
2 files changed, 50 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 4a4c9788b3d881..218f487f7e12ce 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -2309,6 +2309,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19;
const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6;
const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11;
+ const int GFX950_DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 19;
const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4;
const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2;
const int MaxWaitStates = 19;
@@ -2414,7 +2415,10 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64:
case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
- NeedWaitStates = DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates;
+ NeedWaitStates =
+ ST.hasGFX950Insts()
+ ? GFX950_DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates
+ : DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates;
break;
case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:
diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
index 1499fd4907a181..2ba873f55a1eb0 100644
--- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
+++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
@@ -536,8 +536,12 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 2
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap
body: |
@@ -562,7 +566,7 @@ body: |
# GFX950-NEXT: S_NOP 7
# GFX950-NEXT: S_NOP 7
-# GFX950-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap
body: |
@@ -632,8 +636,12 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 2
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap
body: |
@@ -643,8 +651,12 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 2
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 2
# GCN-NEXT: V_SMFMAC
name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap
body: |
@@ -654,8 +666,13 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 2
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 2
+
# GCN-NEXT: V_SMFMAC
name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap
body: |
@@ -1452,8 +1469,12 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 2
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap
body: |
@@ -1473,8 +1494,13 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 2
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 2
+
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap
body: |
@@ -1504,8 +1530,12 @@ body: |
...
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap
# GCN: V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 2
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap
body: |
More information about the llvm-branch-commits
mailing list