[llvm-branch-commits] [llvm] AMDGPU: Handle gfx950 XDL-write-overlapped-smfma-src-c wait state change (PR #117263)

Matt Arsenault via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Nov 21 17:13:47 PST 2024


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117263

>From 736d914241979efb46b506fb45cee79e73bbd20e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 6 Mar 2024 19:51:00 +0530
Subject: [PATCH] AMDGPU: Handle gfx950 XDL-write-overlapped-smfma-src-c wait
 state change

These have an additional wait state compared to gfx940.
---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp |  16 ++-
 .../CodeGen/AMDGPU/mai-hazards-gfx940.mir     | 129 ++++++++++++------
 .../AMDGPU/mai-hazards-mfma-scale.gfx950.mir  |  22 +--
 3 files changed, 107 insertions(+), 60 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 99a176731599cc..be0936ce74835f 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -2232,12 +2232,14 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) {
 }
 
 static int
-GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) {
-  // 2 pass -> 3
-  // 4 pass -> 5
-  // 8 pass -> 9
-  // 16 pass -> 17
-  return NumPasses + 1;
+GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses,
+                                                         bool IsGFX950) {
+  // xdl def cycles | gfx940 | gfx950
+  // 2 pass         |  3        4
+  // 4 pass         |  5        6
+  // 8 pass         |  9        10
+  // 16 pass        |  17       18
+  return NumPasses + 1 + IsGFX950;
 }
 
 static int
@@ -2373,7 +2375,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
             NeedWaitStates =
                 isXDL(ST, *MI1)
                     ? GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(
-                          NumPasses)
+                          NumPasses, ST.hasGFX950Insts())
                     : GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(
                           NumPasses);
             break;
diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
index d2b2f226404da8..b9135dbd46fc1f 100644
--- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
+++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
@@ -145,7 +145,8 @@ body:             |
 ...
 # GCN-LABEL: name: sgemm4x4_mfma_write_agpr_mfma_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_MFMA
 name:            sgemm4x4_mfma_write_agpr_mfma_read_overlap
 body:             |
@@ -155,7 +156,8 @@ body:             |
 ...
 # GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_MFMA
 name:            sgemm4x4_mfma_write_vgpr_mfma_read_overlap
 body:             |
@@ -165,7 +167,8 @@ body:             |
 ...
 # GCN-LABEL: name: sgemm4x4_mfma_write_agpr_smfmac_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_SMFMAC
 name:            sgemm4x4_mfma_write_agpr_smfmac_read_overlap
 body:             |
@@ -175,8 +178,11 @@ body:             |
 ...
 # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 0
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_MFMA
 name:            xdl_sgemm16x16_mfma_write_agpr_mfma_read_overlap
 body:             |
@@ -186,8 +192,11 @@ body:             |
 ...
 # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 0
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_MFMA
 name:            xdl_sgemm16x16_mfma_write_vgpr_mfma_read_overlap
 body:             |
@@ -216,8 +225,11 @@ body:             |
 ...
 # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_smfmac_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 7
+# GFX940-NEXT: S_NOP 0
+
+# GFX950-NEXT: S_NOP 7
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_SMFMAC
 name:            xdl_sgemm16x16_mfma_write_agpr_smfmac_read_overlap
 body:             |
@@ -229,7 +241,8 @@ body:             |
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_MFMA
 name:            xdl_sgemm32x32_mfma_write_agpr_mfma_read_overlap
 body:             |
@@ -241,7 +254,8 @@ body:             |
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_MFMA
 name:            xdl_sgemm32x32_mfma_write_vgpr_mfma_read_overlap
 body:             |
@@ -273,7 +287,8 @@ body:             |
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_SMFMAC
 name:            xdl_sgemm32x32_mfma_write_agpr_smfmac_read_overlap
 body:             |
@@ -325,7 +340,8 @@ body:             |
 ...
 # GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_MFMA
 name:            sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap
 body:             |
@@ -336,7 +352,8 @@ body:             |
 # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_dgemm_mfma_read_overlap
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_MFMA
 name:            xdl_sgemm16x16_mfma_write_vgpr_dgemm_mfma_read_overlap
 body:             |
@@ -348,7 +365,8 @@ body:             |
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_MFMA
 name:            xdl_sgemm32x32_mfma_write_vgpr_dgemm_mfma_read_overlap
 body:             |
@@ -359,7 +377,8 @@ body:             |
 # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_partial
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_MFMA
 name:            xdl_sgemm16x16_mfma_write_agpr_mfma_read_partial
 body:             |
@@ -370,7 +389,8 @@ body:             |
 # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_partial
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_MFMA
 name:            xdl_sgemm16x16_mfma_write_vgpr_mfma_read_partial
 body:             |
@@ -1345,7 +1365,8 @@ body:             |
 ...
 # GCN-LABEL: name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_MFMA
 name:            sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap
 body:             |
@@ -1356,7 +1377,8 @@ body:             |
 # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_sgpr_dgemm_mfma_read_overlap
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_MFMA
 name:            xdl_sgemm16x16_mfma_write_sgpr_dgemm_mfma_read_overlap
 body:             |
@@ -1368,7 +1390,8 @@ body:             |
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_MFMA
 name:            xdl_sgemm32x32_mfma_write_agpr_dgemm_mfma_read_overlap
 body:             |
@@ -1589,7 +1612,8 @@ body:             |
 ...
 # GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_mfma_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 4
+# GFX940-NEXT: S_NOP 4
+# GFX950-NEXT: S_NOP 5
 # GCN-NEXT: V_MFMA
 name:            sgemm16X16X16_mfma_write_agpr_mfma_read_overlap
 body:             |
@@ -1599,7 +1623,8 @@ body:             |
 ...
 # GCN-LABEL: name: sgemm16X16X32_mfma_write_agpr_mfma_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 4
+# GFX940-NEXT: S_NOP 4
+# GFX950-NEXT: S_NOP 5
 # GCN-NEXT: V_MFMA
 name:            sgemm16X16X32_mfma_write_agpr_mfma_read_overlap
 body:             |
@@ -1609,7 +1634,8 @@ body:             |
 ...
 # GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_dgemm_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 4
+# GFX940-NEXT: S_NOP 4
+# GFX950-NEXT: S_NOP 5
 # GCN-NEXT: V_MFMA
 name:            sgemm16X16X16_mfma_write_agpr_dgemm_read_overlap
 body:             |
@@ -1619,7 +1645,8 @@ body:             |
 ...
 # GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_smfmac_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 4
+# GFX940-NEXT: S_NOP 4
+# GFX950-NEXT: S_NOP 5
 # GCN-NEXT: V_SMFMAC
 name:            sgemm16X16X16_mfma_write_agpr_smfmac_read_overlap
 body:             |
@@ -1629,7 +1656,8 @@ body:             |
 ...
 # GCN-LABEL: name: smfmac16x16_write_agpr_smfmac_read_overlap
 # GCN:      V_SMFMAC
-# GCN-NEXT: S_NOP 4
+# GFX940-NEXT: S_NOP 4
+# GFX950-NEXT: S_NOP 5
 # GCN-NEXT: V_SMFMAC
 name:            smfmac16x16_write_agpr_smfmac_read_overlap
 body:             |
@@ -1727,7 +1755,8 @@ body:             |
 ...
 # GCN-LABEL: name: smfmac16x16x32_mfma_write_agpr_mfma_read_overlap
 # GCN:      V_SMFMAC
-# GCN-NEXT: S_NOP 4
+# GFX940-NEXT: S_NOP 4
+# GFX950-NEXT: S_NOP 5
 # GCN-NEXT: V_SMFMAC
 name:            smfmac16x16x32_mfma_write_agpr_mfma_read_overlap
 body:             |
@@ -1738,7 +1767,8 @@ body:             |
 # GCN-LABEL: name: smfmac32x32x32_mfma_write_agpr_mfma_read_overlap
 # GCN:      V_SMFMAC
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_SMFMAC
 name:            smfmac32x32x32_mfma_write_agpr_mfma_read_overlap
 body:             |
@@ -1804,7 +1834,8 @@ body:             |
 ...
 # GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_MFMA
 name:            xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap
 body:             |
@@ -1923,7 +1954,8 @@ body:             |
 ...
 # GCN-LABEL: name: xdl_sgemm16x16_4pass_mfma_write_agpr_mfma_read_overlap
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 4
+# GFX940-NEXT: S_NOP 4
+# GFX950-NEXT: S_NOP 5
 # GCN-NEXT: V_MFMA
 name:            xdl_sgemm16x16_4pass_mfma_write_agpr_mfma_read_overlap
 body:             |
@@ -1933,7 +1965,8 @@ body:             |
 ...
 # GCN-LABEL: name: smfmac16x16_mfma_write_agpr_mfma_read_overlap
 # GCN:      V_SMFMAC
-# GCN-NEXT: S_NOP 4
+# GFX940-NEXT: S_NOP 4
+# GFX950-NEXT: S_NOP 5
 # GCN-NEXT: V_MFMA
 name:            smfmac16x16_mfma_write_agpr_mfma_read_overlap
 body:             |
@@ -2047,7 +2080,8 @@ body:             |
 # 2 pass source
 # GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcc
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_MFMA
 name:            xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcc
 body:             |
@@ -2092,7 +2126,8 @@ body:             |
 # 4 pass source
 # GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcc
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 4
+# GFX940-NEXT: S_NOP 4
+# GFX950-NEXT: S_NOP 5
 # GCN-NEXT: V_MFMA
 name:            xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcc
 body:             |
@@ -2134,7 +2169,8 @@ body:             |
 # 2 pass source
 # GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_MFMA
 name:            xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc
 body:             |
@@ -2179,7 +2215,8 @@ body:             |
 # 4 pass source
 # GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcc
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 4
+# GFX940-NEXT: S_NOP 4
+# GFX950-NEXT: S_NOP 5
 # GCN-NEXT: V_MFMA
 name:            xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcc
 body:             |
@@ -2222,7 +2259,8 @@ body:             |
 # GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_MFMA
 name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc
 body:             |
@@ -2268,7 +2306,8 @@ body:             |
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_MFMA
 name:            xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc
 body:             |
@@ -2356,7 +2395,8 @@ body:             |
 # GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcc
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_MFMA
 name:            xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcc
 body:             |
@@ -2399,7 +2439,8 @@ body:             |
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_MFMA
 name:            xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcc
 body:             |
@@ -2446,7 +2487,8 @@ body:             |
 # 2 pass source
 # GCN-LABEL: name: xdl_mfma_2pass_write_agpr_smfmac_read_overlap_srcc
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 2
+# GFX940-NEXT: S_NOP 2
+# GFX950-NEXT: S_NOP 3
 # GCN-NEXT: V_SMFMAC_
 name:            xdl_mfma_2pass_write_agpr_smfmac_read_overlap_srcc
 body:             |
@@ -2460,7 +2502,8 @@ body:             |
 ...
 # GCN-LABEL: name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc
 # GCN:      V_MFMA
-# GCN-NEXT: S_NOP 4
+# GFX940: S_NOP 4
+# GFX950: S_NOP 5
 # GCN-NEXT: V_SMFMAC_
 name:            xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc
 body:             |
@@ -2474,7 +2517,8 @@ body:             |
 # GCN-LABEL: name: xdl_8pass_mfma_write_agpr_smfmac_read_overlap_srcc
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_SMFMAC_
 name:            xdl_8pass_mfma_write_agpr_smfmac_read_overlap_srcc
 body:             |
@@ -2488,7 +2532,8 @@ body:             |
 # GCN:      V_MFMA
 # GCN-NEXT: S_NOP 7
 # GCN-NEXT: S_NOP 7
-# GCN-NEXT: S_NOP 0
+# GFX940-NEXT: S_NOP 0
+# GFX950-NEXT: S_NOP 1
 # GCN-NEXT: V_SMFMAC_
 name:            xdl_16pass_mfma_write_agpr_smfmac_read_overlap_srcc
 body:             |
diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-mfma-scale.gfx950.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-mfma-scale.gfx950.mir
index bf51efab2cdf12..f68b84c7140ba4 100644
--- a/llvm/test/CodeGen/AMDGPU/mai-hazards-mfma-scale.gfx950.mir
+++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-mfma-scale.gfx950.mir
@@ -16,7 +16,7 @@ body:             |
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
     ; GCN-NEXT: S_NOP 7
-    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: S_NOP 1
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
     ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
     renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
@@ -38,7 +38,7 @@ body:             |
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 1, 1, implicit $mode, implicit $exec
     ; GCN-NEXT: S_NOP 7
-    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: S_NOP 1
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
     ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
     renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 1, 1, implicit $mode, implicit $exec
@@ -60,7 +60,7 @@ body:             |
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 2, 0, implicit $mode, implicit $exec
     ; GCN-NEXT: S_NOP 7
-    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: S_NOP 1
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
     ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
     renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 2, 0, implicit $mode, implicit $exec
@@ -82,7 +82,7 @@ body:             |
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 2, implicit $mode, implicit $exec
     ; GCN-NEXT: S_NOP 7
-    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: S_NOP 1
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
     ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
     renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 2, implicit $mode, implicit $exec
@@ -102,7 +102,7 @@ body:             |
     ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 2, 2, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 4
+    ; GCN-NEXT: S_NOP 5
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
     ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
     renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 2, 2, implicit $mode, implicit $exec
@@ -122,7 +122,7 @@ body:             |
     ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 3, 3, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 4
+    ; GCN-NEXT: S_NOP 5
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
     ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
     renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 3, 3, implicit $mode, implicit $exec
@@ -142,7 +142,7 @@ body:             |
     ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 4, 4, implicit $mode, implicit $exec
-    ; GCN-NEXT: S_NOP 4
+    ; GCN-NEXT: S_NOP 5
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
     ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
     renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 4, 4, implicit $mode, implicit $exec
@@ -165,7 +165,7 @@ body:             |
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec
     ; GCN-NEXT: S_NOP 7
     ; GCN-NEXT: S_NOP 7
-    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: S_NOP 1
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, killed $vgpr32, 12, 4, implicit $mode, implicit $exec
     ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
     renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec
@@ -187,7 +187,7 @@ body:             |
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 2, 2, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec
     ; GCN-NEXT: S_NOP 7
-    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: S_NOP 1
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, killed $vgpr32, 12, 4, implicit $mode, implicit $exec
     ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
     renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 2, 2, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec
@@ -210,7 +210,7 @@ body:             |
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec
     ; GCN-NEXT: S_NOP 7
     ; GCN-NEXT: S_NOP 7
-    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: S_NOP 1
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec
     ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
     renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec
@@ -232,7 +232,7 @@ body:             |
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 2, 2, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec
     ; GCN-NEXT: S_NOP 7
-    ; GCN-NEXT: S_NOP 0
+    ; GCN-NEXT: S_NOP 1
     ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec
     ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
     renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 2, 2, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec



More information about the llvm-branch-commits mailing list