[llvm] 1188b1f - AMDGPU: Handle gfx950 XDL Write-VGPR-VALU-WAW wait state change (#126132)

via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 11 10:32:27 PST 2025


Author: Vigneshwar Jayakumar
Date: 2025-02-12T01:32:23+07:00
New Revision: 1188b1ff7b956cb65d8ddda5f1e56c432f1a57c7

URL: https://github.com/llvm/llvm-project/commit/1188b1ff7b956cb65d8ddda5f1e56c432f1a57c7
DIFF: https://github.com/llvm/llvm-project/commit/1188b1ff7b956cb65d8ddda5f1e56c432f1a57c7.diff

LOG: AMDGPU: Handle gfx950 XDL Write-VGPR-VALU-WAW wait state change (#126132)

There are additional wait states for XDL write VALU WAW hazard in gfx950
compared to gfx940.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
    llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 36b9003a0ee65..b0f087737afa7 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -2598,12 +2598,14 @@ static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
   return NumPasses + 2;
 }
 
-static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
-  // 2 pass -> 5
-  // 4 pass -> 7
-  // 8 pass -> 11
-  // 16 pass -> 19
-  return NumPasses + 3;
+static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses,
+                                                       bool IsGFX950) {
+  // xdl def cycles | gfx940 | gfx950
+  // 2 pass         |  5        5
+  // 4 pass         |  7        8
+  // 8 pass         |  11       12
+  // 16 pass        |  19       20
+  return NumPasses + 3 + (NumPasses != 2 && IsGFX950);
 }
 
 static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses,
@@ -2851,7 +2853,8 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
       } else if (ST.hasGFX940Insts()) {
         NeedWaitStates =
             isXDL(ST, *MFMA)
-                ? GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(NumPasses)
+                ? GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(
+                      NumPasses, ST.hasGFX950Insts())
                 : GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(NumPasses);
       } else {
         switch (NumPasses) {

diff  --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
index ef30c9a44b2b5..0af37ad8c896e 100644
--- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
+++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
@@ -958,7 +958,8 @@ body:             |
 # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_write
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_MOV_B32
 name:            xdl_smfma16x16_write_vgpr_valu_write
 body:             |
@@ -970,7 +971,8 @@ body:             |
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_MOV_B32
 name:            xdl_smfma32x32_write_vgpr_valu_write
 body:             |
@@ -991,7 +993,8 @@ body:             |
 # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_f16_write
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_FMA_F16_e64
 name:            xdl_smfma16x16_write_vgpr_valu_f16_write
 body:             |
@@ -1003,7 +1006,8 @@ body:             |
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_FMA_F16_e64
 name:            xdl_smfma32x32_write_vgpr_valu_f16_write
 body:             |
@@ -1024,7 +1028,8 @@ body:             |
 # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_sdwa_write
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_MOV_B32_sdwa
 name:            xdl_smfma16x16_write_vgpr_valu_sdwa_write
 body:             |
@@ -1761,7 +1766,8 @@ body:             |
 ...
 # GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_write
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 6
+# GFX940-NEXT: S_NOP 6
+# GFX950-NEXT: S_NOP 7
 # GCN-NEXT: V_MOV_B32
 name:            xdl_sgemm16X16X16_mfma_write_vgpr_valu_write
 body:             |
@@ -2072,7 +2078,8 @@ body:             |
 ...
 # GCN-LABEL: name: smfmac16x16_read_vgpr_srcc_valu_write
 # GCN:      V_SMFMAC
-# GCN-NEXT: S_NOP 6
+# GFX940-NEXT: S_NOP 6
+# GFX950-NEXT: S_NOP 7
 # GCN-NEXT: V_MOV_B32
 name:            smfmac16x16_read_vgpr_srcc_valu_write
 body:             |
@@ -2102,7 +2109,8 @@ body:             |
 # GCN-LABEL: name: smfmac32x32_read_vgpr_srcc_valu_write
 # GCN:      V_SMFMAC
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_MOV_B32
 name:            smfmac32x32_read_vgpr_srcc_valu_write
 body:             |


        


More information about the llvm-commits mailing list