[llvm-branch-commits] [llvm] AMDGPU: Refine gfx950 xdl-write-vgpr hazard cases (PR #117285)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Nov 22 12:17:02 PST 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117285
>From ccc271528980d20517142d65373be6db85d0447b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 12 Mar 2024 13:29:05 +0530
Subject: [PATCH] AMDGPU: Refine gfx950 xdl-write-vgpr hazard cases
The 2-pass XDL write VGPR, read by non-XDL SGEMM/DGEMM case
was 1 wait state overly conservative. Previously, for gfx940,
the XDL/non-XDL cases happened to have the same number of cycles
in all cases. Now the XDL consumer case has an additional state for
2 pass sources.
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 22 +++++++++++++++----
.../CodeGen/AMDGPU/mai-hazards-gfx940.mir | 15 +++++--------
2 files changed, 23 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 218f487f7e12ce..8008b5f7bcc991 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -2232,8 +2232,8 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) {
}
static int
-GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses,
- bool IsGFX950) {
+GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses,
+ bool IsGFX950) {
// xdl def cycles | gfx940 | gfx950
// 2 pass | 3 4
// 4 pass | 5 6
@@ -2242,6 +2242,17 @@ GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses,
return NumPasses + 1 + IsGFX950;
}
+static int
+GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses,
+ bool IsGFX950) {
+ // xdl def cycles | gfx940 | gfx950
+ // 2 pass | 3 3
+ // 4 pass | 5 6
+ // 8 pass | 9 10
+ // 16 pass | 17 18
+ return NumPasses + 1 + (NumPasses != 2 && IsGFX950);
+}
+
static int
GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) {
// 2 pass -> 2
@@ -2379,8 +2390,11 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
NeedWaitStates =
isXDL(ST, *MI1)
- ? GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(
- NumPasses, ST.hasGFX950Insts())
+ ? (isXDL(ST, *MI)
+ ? GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(
+ NumPasses, ST.hasGFX950Insts())
+ : GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(
+ NumPasses, ST.hasGFX950Insts()))
: GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(
NumPasses);
break;
diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
index 2ba873f55a1eb0..d59bcfb16eece2 100644
--- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
+++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
@@ -156,8 +156,7 @@ body: |
...
# GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap
# GCN: V_MFMA
-# GFX940-NEXT: S_NOP 2
-# GFX950-NEXT: S_NOP 3
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap
body: |
@@ -348,8 +347,7 @@ body: |
...
# GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap
# GCN: V_MFMA
-# GFX940-NEXT: S_NOP 2
-# GFX950-NEXT: S_NOP 3
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap
body: |
@@ -1403,8 +1401,7 @@ body: |
...
# GCN-LABEL: name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap
# GCN: V_MFMA
-# GFX940-NEXT: S_NOP 2
-# GFX950-NEXT: S_NOP 3
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap
body: |
@@ -1885,8 +1882,7 @@ body: |
...
# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap
# GCN: V_MFMA
-# GFX940-NEXT: S_NOP 2
-# GFX950-NEXT: S_NOP 3
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap
body: |
@@ -2220,8 +2216,7 @@ body: |
# 2 pass source
# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc
# GCN: V_MFMA
-# GFX940-NEXT: S_NOP 2
-# GFX950-NEXT: S_NOP 3
+# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc
body: |
More information about the llvm-branch-commits
mailing list