[llvm-branch-commits] [llvm] release/20.x: AMDGPU: Handle gfx950 XDL Write-VGPR-VALU-WAW wait state change (#126132) (PR #126847)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Feb 18 21:55:14 PST 2025


https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/126847

>From e503227bc57625a0a22b450f5bd3e78df96ca4fe Mon Sep 17 00:00:00 2001
From: Vigneshwar Jayakumar <vigneshwar.jayakumar at amd.com>
Date: Tue, 11 Feb 2025 12:32:23 -0600
Subject: [PATCH] AMDGPU: Handle gfx950 XDL Write-VGPR-VALU-WAW wait state
 change (#126132)

There are additional wait states for XDL write VALU WAW hazard in gfx950
compared to gfx940.

(cherry picked from commit 1188b1ff7b956cb65d8ddda5f1e56c432f1a57c7)
---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 17 +++++++------
 .../CodeGen/AMDGPU/mai-hazards-gfx940.mir     | 24 ++++++++++++-------
 2 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 537181710ed32..646663a92e5e8 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -2605,12 +2605,14 @@ static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
   return NumPasses + 2;
 }
 
-static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
-  // 2 pass -> 5
-  // 4 pass -> 7
-  // 8 pass -> 11
-  // 16 pass -> 19
-  return NumPasses + 3;
+static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses,
+                                                       bool IsGFX950) {
+  // xdl def cycles | gfx940 | gfx950
+  // 2 pass         |  5        5
+  // 4 pass         |  7        8
+  // 8 pass         |  11       12
+  // 16 pass        |  19       20
+  return NumPasses + 3 + (NumPasses != 2 && IsGFX950);
 }
 
 static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses,
@@ -2858,7 +2860,8 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
       } else if (ST.hasGFX940Insts()) {
         NeedWaitStates =
             isXDL(ST, *MFMA)
-                ? GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(NumPasses)
+                ? GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(
+                      NumPasses, ST.hasGFX950Insts())
                 : GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(NumPasses);
       } else {
         switch (NumPasses) {
diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
index ef30c9a44b2b5..0af37ad8c896e 100644
--- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
+++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
@@ -958,7 +958,8 @@ body:             |
 # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_write
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_MOV_B32
 name:            xdl_smfma16x16_write_vgpr_valu_write
 body:             |
@@ -970,7 +971,8 @@ body:             |
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_MOV_B32
 name:            xdl_smfma32x32_write_vgpr_valu_write
 body:             |
@@ -991,7 +993,8 @@ body:             |
 # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_f16_write
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_FMA_F16_e64
 name:            xdl_smfma16x16_write_vgpr_valu_f16_write
 body:             |
@@ -1003,7 +1006,8 @@ body:             |
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_FMA_F16_e64
 name:            xdl_smfma32x32_write_vgpr_valu_f16_write
 body:             |
@@ -1024,7 +1028,8 @@ body:             |
 # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_sdwa_write
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_MOV_B32_sdwa
 name:            xdl_smfma16x16_write_vgpr_valu_sdwa_write
 body:             |
@@ -1761,7 +1766,8 @@ body:             |
 ...
 # GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_write
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 6
+# GFX940-NEXT: S_NOP 6
+# GFX950-NEXT: S_NOP 7
 # GCN-NEXT: V_MOV_B32
 name:            xdl_sgemm16X16X16_mfma_write_vgpr_valu_write
 body:             |
@@ -2072,7 +2078,8 @@ body:             |
 ...
 # GCN-LABEL: name: smfmac16x16_read_vgpr_srcc_valu_write
 # GCN:      V_SMFMAC
-# GCN-NEXT: S_NOP 6
+# GFX940-NEXT: S_NOP 6
+# GFX950-NEXT: S_NOP 7
 # GCN-NEXT: V_MOV_B32
 name:            smfmac16x16_read_vgpr_srcc_valu_write
 body:             |
@@ -2102,7 +2109,8 @@ body:             |
 # GCN-LABEL: name: smfmac32x32_read_vgpr_srcc_valu_write
 # GCN:      V_SMFMAC
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_MOV_B32
 name:            smfmac32x32_read_vgpr_srcc_valu_write
 body:             |



More information about the llvm-branch-commits mailing list