[clang] [llvm] [AMDGPU] Add WMMA and SWMMAC instructions for gfx1170 (PR #180731)

Tue Feb 10 04:11:19 PST 2026

https://github.com/mbrkusanin created https://github.com/llvm/llvm-project/pull/180731

Introduce two new subtarget features:

- WMMA256bInsts for GFX11 WMMA instructions and
- WMMA128bInsts for GFX1170 and GFX12 WMMA and SWMMAC instructions

Some WMMA instructions have changed from GFX 11.0 to GFX 11.7 so new
Real versions were added with "_gfx1170" suffix. For consistency all
WMMA and SWMMAC GFX11.7 instructions use this suffix.

To resolve decoding issues between different formats for some WMMA
instructions between GFX 11 and GFX 11.7, new decoding tables were
added.


>From 735330f9ac0b360437dfd9e3fbf6a7c6fedeee9b Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Tue, 10 Feb 2026 13:03:41 +0100
Subject: [PATCH] [AMDGPU] Add WMMA and SWMMAC instructions for gfx1170

Introduce two new subtarget features:

    WMMA256bInsts for GFX11 WMMA instructions and
    WMMA128bInsts for GFX1170 and GFX12 WMMA and SWMMAC instructions

Some WMMA instructions have changed from GFX 11.0 to GFX 11.7 so new
Real versions were added with "_gfx1170" suffix. For consistency all
WMMA and SWMMAC GFX11.7 instructions use this suffix.

To resolve decoding issues between different formats for some WMMA
instructions between GFX 11 and GFX 11.7, new decoding tables were
added.
---
 clang/include/clang/Basic/BuiltinsAMDGPU.td   |  150 +-
 .../builtins-amdgcn-gfx12-wmma-w32.cl         |  141 +-
 .../builtins-amdgcn-gfx12-wmma-w64.cl         |  141 +-
 .../builtins-amdgcn-wmma-w32-gfx10-err.cl     |   16 +-
 .../builtins-amdgcn-wmma-w64-gfx10-err.cl     |   16 +-
 llvm/lib/Target/AMDGPU/AMDGPU.td              |   27 +-
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |    2 +
 .../Disassembler/AMDGPUDisassembler.cpp       |   10 +
 .../AMDGPU/Disassembler/AMDGPUDisassembler.h  |    1 +
 llvm/lib/Target/AMDGPU/GCNSubtarget.h         |    4 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td         |    7 +-
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    |    4 +
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |    1 +
 llvm/lib/Target/AMDGPU/VOP3PInstructions.td   |  148 +-
 llvm/lib/TargetParser/TargetParser.cpp        |   30 +-
 ...wmma-gfx12-w32-f16-f32-matrix-modifiers.ll |  374 ++--
 .../AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll   |  598 +++---
 .../GlobalISel/wmma-gfx12-w32-iu-modifiers.ll |  258 +--
 .../wmma-gfx12-w32-swmmac-index_key.ll        |  183 +-
 .../AMDGPU/GlobalISel/wmma-gfx12-w32.ll       |  298 +--
 ...wmma-gfx12-w64-f16-f32-matrix-modifiers.ll |  291 +--
 .../AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll   |  442 ++---
 .../GlobalISel/wmma-gfx12-w64-iu-modifiers.ll |  186 +-
 .../wmma-gfx12-w64-swmmac-index_key.ll        |  266 ++-
 .../AMDGPU/GlobalISel/wmma-gfx12-w64.ll       |  226 +--
 ...wmma-gfx12-w32-f16-f32-matrix-modifiers.ll |  371 ++--
 .../test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll |  334 +++-
 .../AMDGPU/wmma-gfx12-w32-iu-modifiers.ll     |  258 +--
 .../AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll |  183 +-
 llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll    |  298 +--
 ...wmma-gfx12-w64-f16-f32-matrix-modifiers.ll |  303 +--
 .../test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll |  334 ++--
 .../AMDGPU/wmma-gfx12-w64-iu-modifiers.ll     |  186 +-
 .../AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll |  266 ++-
 llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll    |  226 +--
 .../CodeGen/AMDGPU/wmma-hazards-gfx12-w32.mir |  237 +--
 .../CodeGen/AMDGPU/wmma-hazards-gfx12-w64.mir |  237 +--
 llvm/test/MC/AMDGPU/gfx1170_asm_wmma_w32.s    | 1529 ++++++++++++++++
 llvm/test/MC/AMDGPU/gfx1170_asm_wmma_w64.s    | 1529 ++++++++++++++++
 llvm/test/MC/AMDGPU/literals.s                |    8 +-
 .../AMDGPU/gfx1170_dasm_wmma_w32.txt          | 1628 +++++++++++++++++
 .../AMDGPU/gfx1170_dasm_wmma_w64.txt          | 1628 +++++++++++++++++
 42 files changed, 10513 insertions(+), 2862 deletions(-)
 create mode 100644 llvm/test/MC/AMDGPU/gfx1170_asm_wmma_w32.s
 create mode 100644 llvm/test/MC/AMDGPU/gfx1170_asm_wmma_w64.s
 create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_wmma_w32.txt
 create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_wmma_w64.txt

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td b/clang/include/clang/Basic/BuiltinsAMDGPU.td
index b7839b2febcd3..6fb4dde13f6d2 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.td
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td
@@ -346,23 +346,23 @@ def __builtin_amdgcn_s_wait_event : AMDGPUBuiltin<"void(_Constant short)", [], "
 // Postfix w32 indicates the builtin requires wavefront size of 32.
 // Postfix w64 indicates the builtin requires wavefront size of 64.
 //===----------------------------------------------------------------------===//
-def __builtin_amdgcn_wmma_f32_16x16x16_f16_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<16, _Float16>, _ExtVector<16, _Float16>, _ExtVector<8, float>)", [Const], "gfx11-insts,wavefrontsize32">;
-def __builtin_amdgcn_wmma_f32_16x16x16_bf16_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<16, short>, _ExtVector<16, short>, _ExtVector<8, float>)", [Const], "gfx11-insts,wavefrontsize32">;
-def __builtin_amdgcn_wmma_f16_16x16x16_f16_w32 : AMDGPUBuiltin<"_ExtVector<16, _Float16>(_ExtVector<16, _Float16>, _ExtVector<16, _Float16>, _ExtVector<16, _Float16>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize32">;
-def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32 : AMDGPUBuiltin<"_ExtVector<16, short>(_ExtVector<16, short>, _ExtVector<16, short>, _ExtVector<16, short>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize32">;
-def __builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32 : AMDGPUBuiltin<"_ExtVector<16, _Float16>(_ExtVector<16, _Float16>, _ExtVector<16, _Float16>, _ExtVector<16, _Float16>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize32">;
-def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32 : AMDGPUBuiltin<"_ExtVector<16, short>(_ExtVector<16, short>, _ExtVector<16, short>, _ExtVector<16, short>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize32">;
-def __builtin_amdgcn_wmma_i32_16x16x16_iu8_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<4, int>, _Constant bool, _ExtVector<4, int>, _ExtVector<8, int>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize32">;
-def __builtin_amdgcn_wmma_i32_16x16x16_iu4_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<2, int>, _ExtVector<8, int>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize32">;
-
-def __builtin_amdgcn_wmma_f32_16x16x16_f16_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<16, _Float16>, _ExtVector<16, _Float16>, _ExtVector<4, float>)", [Const], "gfx11-insts,wavefrontsize64">;
-def __builtin_amdgcn_wmma_f32_16x16x16_bf16_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<16, short>, _ExtVector<16, short>, _ExtVector<4, float>)", [Const], "gfx11-insts,wavefrontsize64">;
-def __builtin_amdgcn_wmma_f16_16x16x16_f16_w64 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<16, _Float16>, _ExtVector<16, _Float16>, _ExtVector<8, _Float16>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize64">;
-def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64 : AMDGPUBuiltin<"_ExtVector<8, short>(_ExtVector<16, short>, _ExtVector<16, short>, _ExtVector<8, short>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize64">;
-def __builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<16, _Float16>, _ExtVector<16, _Float16>, _ExtVector<8, _Float16>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize64">;
-def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64 : AMDGPUBuiltin<"_ExtVector<8, short>(_ExtVector<16, short>, _ExtVector<16, short>, _ExtVector<8, short>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize64">;
-def __builtin_amdgcn_wmma_i32_16x16x16_iu8_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, _ExtVector<4, int>, _Constant bool, _ExtVector<4, int>, _ExtVector<4, int>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize64">;
-def __builtin_amdgcn_wmma_i32_16x16x16_iu4_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<2, int>, _ExtVector<4, int>, _Constant bool)", [Const], "gfx11-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_f32_16x16x16_f16_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<16, _Float16>, _ExtVector<16, _Float16>, _ExtVector<8, float>)", [Const], "wmma-256b-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x16_bf16_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<16, short>, _ExtVector<16, short>, _ExtVector<8, float>)", [Const], "wmma-256b-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f16_16x16x16_f16_w32 : AMDGPUBuiltin<"_ExtVector<16, _Float16>(_ExtVector<16, _Float16>, _ExtVector<16, _Float16>, _ExtVector<16, _Float16>, _Constant bool)", [Const], "wmma-256b-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32 : AMDGPUBuiltin<"_ExtVector<16, short>(_ExtVector<16, short>, _ExtVector<16, short>, _ExtVector<16, short>, _Constant bool)", [Const], "wmma-256b-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32 : AMDGPUBuiltin<"_ExtVector<16, _Float16>(_ExtVector<16, _Float16>, _ExtVector<16, _Float16>, _ExtVector<16, _Float16>, _Constant bool)", [Const], "wmma-256b-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32 : AMDGPUBuiltin<"_ExtVector<16, short>(_ExtVector<16, short>, _ExtVector<16, short>, _ExtVector<16, short>, _Constant bool)", [Const], "wmma-256b-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_i32_16x16x16_iu8_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<4, int>, _Constant bool, _ExtVector<4, int>, _ExtVector<8, int>, _Constant bool)", [Const], "wmma-256b-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_i32_16x16x16_iu4_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<2, int>, _ExtVector<8, int>, _Constant bool)", [Const], "wmma-256b-insts,wavefrontsize32">;
+
+def __builtin_amdgcn_wmma_f32_16x16x16_f16_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<16, _Float16>, _ExtVector<16, _Float16>, _ExtVector<4, float>)", [Const], "wmma-256b-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_f32_16x16x16_bf16_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<16, short>, _ExtVector<16, short>, _ExtVector<4, float>)", [Const], "wmma-256b-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_f16_16x16x16_f16_w64 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<16, _Float16>, _ExtVector<16, _Float16>, _ExtVector<8, _Float16>, _Constant bool)", [Const], "wmma-256b-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64 : AMDGPUBuiltin<"_ExtVector<8, short>(_ExtVector<16, short>, _ExtVector<16, short>, _ExtVector<8, short>, _Constant bool)", [Const], "wmma-256b-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<16, _Float16>, _ExtVector<16, _Float16>, _ExtVector<8, _Float16>, _Constant bool)", [Const], "wmma-256b-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64 : AMDGPUBuiltin<"_ExtVector<8, short>(_ExtVector<16, short>, _ExtVector<16, short>, _ExtVector<8, short>, _Constant bool)", [Const], "wmma-256b-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_i32_16x16x16_iu8_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, _ExtVector<4, int>, _Constant bool, _ExtVector<4, int>, _ExtVector<4, int>, _Constant bool)", [Const], "wmma-256b-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_i32_16x16x16_iu4_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<2, int>, _ExtVector<4, int>, _Constant bool)", [Const], "wmma-256b-insts,wavefrontsize64">;
 
 def __builtin_amdgcn_s_sendmsg_rtn : AMDGPUBuiltin<"unsigned int(_Constant unsigned int)", [], "gfx11-insts">;
 def __builtin_amdgcn_s_sendmsg_rtnl : AMDGPUBuiltin<"uint64_t(_Constant unsigned int)", [], "gfx11-insts">;
@@ -587,67 +587,71 @@ def __builtin_amdgcn_ds_bvh_stack_push8_pop1_rtn : AMDGPUBuiltin<"_ExtVector<2,
 // The second return value of the intrinsic is zext'ed.
 def __builtin_amdgcn_ds_bvh_stack_push8_pop2_rtn : AMDGPUBuiltin<"_ExtVector<2, uint64_t>(unsigned int, unsigned int, _ExtVector<8, unsigned int>, _Constant int)", [], "gfx12-insts">;
 
+//===----------------------------------------------------------------------===//
+// GFX1170, GFX12+ only builtins.
+//===----------------------------------------------------------------------===//
+
 //===----------------------------------------------------------------------===//
 // WMMA builtins.
 // Postfix w32 indicates the builtin requires wavefront size of 32.
 // Postfix w64 indicates the builtin requires wavefront size of 64.
 //
-// Some of these are very similar to their GFX11 counterparts, but they don't
-// require replication of the A,B matrices, so they use fewer vector elements.
-// Therefore, we add an "_gfx12" suffix to distinguish them from the existing
-// builtins.
-//===----------------------------------------------------------------------===//
-def __builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, _Float16>, _ExtVector<8, _Float16>, _ExtVector<8, float>)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, short>, _ExtVector<8, short>, _ExtVector<8, float>)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, _Float16>, _ExtVector<8, _Float16>, _ExtVector<8, _Float16>)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, short>(_ExtVector<8, short>, _ExtVector<8, short>, _ExtVector<8, short>)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<2, int>, _ExtVector<8, int>, _Constant bool)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, int, _Constant bool, int, _ExtVector<8, int>, _Constant bool)", [Const], "gfx12-insts,wavefrontsize32">;
-// These are gfx12-only, but for consistency with the other WMMA variants we're
-// keeping the "_gfx12" suffix.
-def __builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<2, int>, _ExtVector<8, float>)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<2, int>, _ExtVector<8, float>)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<2, int>, _ExtVector<8, float>)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<2, int>, _ExtVector<8, float>)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<2, int>, _ExtVector<8, int>, _Constant bool)", [Const], "gfx12-insts,wavefrontsize32">;
-
-def __builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, _Float16>, _ExtVector<4, _Float16>, _ExtVector<4, float>)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<4, float>)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, _Float16>(_ExtVector<4, _Float16>, _ExtVector<4, _Float16>, _ExtVector<4, _Float16>)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, short>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<4, short>)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, int, _ExtVector<4, int>, _Constant bool)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, int, _ExtVector<4, int>, _Constant bool)", [Const], "gfx12-insts,wavefrontsize64">;
-// These are gfx12-only, but for consistency with the other WMMA variants we're
-// keeping the "_gfx12" suffix.
-def __builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, float>(int, int, _ExtVector<4, float>)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, float>(int, int, _ExtVector<4, float>)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, float>(int, int, _ExtVector<4, float>)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, float>(int, int, _ExtVector<4, float>)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, int, _ExtVector<4, int>, _Constant bool)", [Const], "gfx12-insts,wavefrontsize64">;
-
-def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, __fp16>, _ExtVector<16, __fp16>, _ExtVector<8, float>, int)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, short>, _ExtVector<16, short>, _ExtVector<8, float>, int)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32 : AMDGPUBuiltin<"_ExtVector<8, __fp16>(_ExtVector<8, __fp16>, _ExtVector<16, __fp16>, _ExtVector<8, __fp16>, int)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32 : AMDGPUBuiltin<"_ExtVector<8, short>(_ExtVector<8, short>, _ExtVector<16, short>, _ExtVector<8, short>, int)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<4, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<4, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "gfx12-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "gfx12-insts,wavefrontsize32">;
-
-def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, __fp16>, _ExtVector<8, __fp16>, _ExtVector<4, float>, int)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<8, short>, _ExtVector<4, float>, int)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w64 : AMDGPUBuiltin<"_ExtVector<4, __fp16>(_ExtVector<4, __fp16>, _ExtVector<8, __fp16>, _ExtVector<4, __fp16>, int)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64 : AMDGPUBuiltin<"_ExtVector<4, short>(_ExtVector<4, short>, _ExtVector<8, short>, _ExtVector<4, short>, int)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<4, int>, int, _Constant bool)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, int, _ExtVector<4, int>, int, _Constant bool)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<4, int>, int, _Constant bool)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "gfx12-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "gfx12-insts,wavefrontsize64">;
+// Some of these are very similar to their base GFX11 counterparts, but they
+// don't require replication of the A,B matrices, so they use fewer vector
+// elements. Therefore, we add an "_gfx12" suffix to distinguish them from the
+// existing builtins.
+//===----------------------------------------------------------------------===//
+def __builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, _Float16>, _ExtVector<8, _Float16>, _ExtVector<8, float>)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, short>, _ExtVector<8, short>, _ExtVector<8, float>)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, _Float16>, _ExtVector<8, _Float16>, _ExtVector<8, _Float16>)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, short>(_ExtVector<8, short>, _ExtVector<8, short>, _ExtVector<8, short>)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<2, int>, _ExtVector<8, int>, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, int, _Constant bool, int, _ExtVector<8, int>, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize32">;
+// These are gfx1170 and gfx12 only, but for consistency with the other WMMA
+// variants we're keeping the "_gfx12" suffix.
+def __builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<2, int>, _ExtVector<8, float>)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<2, int>, _ExtVector<8, float>)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<2, int>, _ExtVector<8, float>)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<2, int>, _ExtVector<8, float>)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<2, int>, _ExtVector<8, int>, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize32">;
+
+def __builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, _Float16>, _ExtVector<4, _Float16>, _ExtVector<4, float>)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<4, float>)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, _Float16>(_ExtVector<4, _Float16>, _ExtVector<4, _Float16>, _ExtVector<4, _Float16>)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, short>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<4, short>)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, int, _ExtVector<4, int>, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, int, _ExtVector<4, int>, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize64">;
+// These are gfx1170 and gfx12 only, but for consistency with the other WMMA
+// variants we're keeping the "_gfx12" suffix.
+def __builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, float>(int, int, _ExtVector<4, float>)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, float>(int, int, _ExtVector<4, float>)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, float>(int, int, _ExtVector<4, float>)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, float>(int, int, _ExtVector<4, float>)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, int, _ExtVector<4, int>, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize64">;
+
+def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, __fp16>, _ExtVector<16, __fp16>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, short>, _ExtVector<16, short>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32 : AMDGPUBuiltin<"_ExtVector<8, __fp16>(_ExtVector<8, __fp16>, _ExtVector<16, __fp16>, _ExtVector<8, __fp16>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32 : AMDGPUBuiltin<"_ExtVector<8, short>(_ExtVector<8, short>, _ExtVector<16, short>, _ExtVector<8, short>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<4, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32 : AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant bool, _ExtVector<4, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32 : AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, _ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
+
+def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, __fp16>, _ExtVector<8, __fp16>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<8, short>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w64 : AMDGPUBuiltin<"_ExtVector<4, __fp16>(_ExtVector<4, __fp16>, _ExtVector<8, __fp16>, _ExtVector<4, __fp16>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64 : AMDGPUBuiltin<"_ExtVector<4, short>(_ExtVector<4, short>, _ExtVector<8, short>, _ExtVector<4, short>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<4, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, int, _ExtVector<4, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64 : AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, _ExtVector<2, int>, _ExtVector<4, int>, int, _Constant bool)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64 : AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
 
 def __builtin_amdgcn_prng_b32 : AMDGPUBuiltin<"unsigned int(unsigned int)", [Const], "prng-inst">;
 def __builtin_amdgcn_cvt_scalef32_pk32_fp6_f16 : AMDGPUBuiltin<"_ExtVector<6, unsigned int>(_ExtVector<32, _Float16>, float)", [Const], "f16bf16-to-fp6bf6-cvt-scale-insts">;
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl
index 6326866ed3c35..47ae7ce82becf 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl
@@ -1,6 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1200
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1170 -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck %s
 
 typedef int    v2i   __attribute__((ext_vector_type(2)));
 typedef float  v8f   __attribute__((ext_vector_type(8)));
@@ -14,12 +15,12 @@ typedef int    v8i   __attribute__((ext_vector_type(8)));
 // amdgcn_wmma_f32_16x16x16_f16
 //
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_f16_w32(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> [[A]], <8 x half> [[B]], <8 x float> [[C]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8:![0-9]+]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_f16_w32(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> [[A]], <8 x half> [[B]], <8 x float> [[C]])
+// CHECK-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8:![0-9]+]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_f16_w32(global v8f* out, v8h a, v8h b, v8f c)
 {
@@ -30,12 +31,12 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w32(global v8f* out, v8h a, v8h b, v8f c)
 // amdgcn_wmma_f32_16x16x16_bf16
 //
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf16_w32(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16(<8 x i16> [[A]], <8 x i16> [[B]], <8 x float> [[C]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf16_w32(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16(<8 x i16> [[A]], <8 x i16> [[B]], <8 x float> [[C]])
+// CHECK-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_bf16_w32(global v8f* out, v8s a, v8s b, v8f c)
 {
@@ -46,12 +47,12 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w32(global v8f* out, v8s a, v8s b, v8f c
 // amdgcn_wmma_f16_16x16x16_f16
 //
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f16_16x16x16_f16_w32(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> [[A]], <8 x half> [[B]], <8 x half> [[C]], i1 false)
-// CHECK-GFX1200-NEXT:    store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_f16_16x16x16_f16_w32(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> [[A]], <8 x half> [[B]], <8 x half> [[C]], i1 false)
+// CHECK-NEXT:    store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_f16_16x16x16_f16_w32(global v8h* out, v8h a, v8h b, v8h c)
 {
@@ -62,12 +63,12 @@ void test_amdgcn_wmma_f16_16x16x16_f16_w32(global v8h* out, v8h a, v8h b, v8h c)
 // amdgcn_wmma_bf16_16x16x16_bf16
 //
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_bf16_16x16x16_bf16_w32(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16(<8 x i16> [[A]], <8 x i16> [[B]], <8 x i16> [[C]], i1 false)
-// CHECK-GFX1200-NEXT:    store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_bf16_16x16x16_bf16_w32(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]], <8 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16(<8 x i16> [[A]], <8 x i16> [[B]], <8 x i16> [[C]], i1 false)
+// CHECK-NEXT:    store <8 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_bf16_16x16x16_bf16_w32(global v8s* out, v8s a, v8s b, v8s c)
 {
@@ -78,12 +79,12 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w32(global v8s* out, v8s a, v8s b, v8s
 // amdgcn_wmma_i32_16x16x16_iu8
 //
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu8_w32(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 true, <2 x i32> [[A]], i1 true, <2 x i32> [[B]], <8 x i32> [[C]], i1 false)
-// CHECK-GFX1200-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu8_w32(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 true, <2 x i32> [[A]], i1 true, <2 x i32> [[B]], <8 x i32> [[C]], i1 false)
+// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_i32_16x16x16_iu8_w32(global v8i* out, v2i a, v2i b, v8i c)
 {
@@ -94,79 +95,79 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w32(global v8i* out, v2i a, v2i b, v8i c)
 // amdgcn_wmma_i32_16x16x16_iu4
 //
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu4_w32(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <8 x i32> [[C]], i1 false)
-// CHECK-GFX1200-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu4_w32(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <8 x i32> [[C]], i1 false)
+// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_i32_16x16x16_iu4_w32(global v8i* out, int a, int b, v8i c)
 {
   *out = __builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12(true, a, true, b, c, false);
 }
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]])
+// CHECK-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v8f* out, v2i a, v2i b, v8f c)
 {
   *out = __builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12(a, b, c);
 }
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]])
+// CHECK-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v8f* out, v2i a, v2i b, v8f c)
 {
   *out = __builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12(a, b, c);
 }
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]])
+// CHECK-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v8f* out, v2i a, v2i b, v8f c)
 {
   *out = __builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12(a, b, c);
 }
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]])
-// CHECK-GFX1200-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32> [[A]], <2 x i32> [[B]], <8 x float> [[C]])
+// CHECK-NEXT:    store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v8f* out, v2i a, v2i b, v8f c)
 {
   *out = __builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12(a, b, c);
 }
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x32_iu4_w32(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 true, <2 x i32> [[A]], i1 true, <2 x i32> [[B]], <8 x i32> [[C]], i1 false)
-// CHECK-GFX1200-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x32_iu4_w32(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 32)) [[OUT:%.*]], <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]], <8 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 true, <2 x i32> [[A]], i1 true, <2 x i32> [[B]], <8 x i32> [[C]], i1 false)
+// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 32, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_i32_16x16x32_iu4_w32(global v8i* out, v2i a, v2i b, v8i c)
 {
   *out = __builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12(true, a, true, b, c, false);
 }
 //.
-// CHECK-GFX1200: [[META6:![0-9]+]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0}
-// CHECK-GFX1200: [[META7]] = !{!"Simple C/C++ TBAA"}
-// CHECK-GFX1200: [[CHAR_TBAA8]] = !{[[META6]], [[META6]], i64 0}
+// CHECK: [[META6:![0-9]+]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0}
+// CHECK: [[META7]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[CHAR_TBAA8]] = !{[[META6]], [[META6]], i64 0}
 //.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl
index a79c3d4da1ebb..98ce84adf1554 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl
@@ -1,6 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1200
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1170 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck %s
 
 typedef float  v4f   __attribute__((ext_vector_type(4)));
 typedef half   v4h   __attribute__((ext_vector_type(4)));
@@ -13,12 +14,12 @@ typedef int    v4i   __attribute__((ext_vector_type(4)));
 // amdgcn_wmma_f32_16x16x16_f16
 //
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_f16_w64(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half> [[A]], <4 x half> [[B]], <4 x float> [[C]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8:![0-9]+]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_f16_w64(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half> [[A]], <4 x half> [[B]], <4 x float> [[C]])
+// CHECK-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8:![0-9]+]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_f16_w64(global v4f* out, v4h a, v4h b, v4f c)
 {
@@ -29,12 +30,12 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w64(global v4f* out, v4h a, v4h b, v4f c)
 // amdgcn_wmma_f32_16x16x16_bf16
 //
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf16_w64(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16(<4 x i16> [[A]], <4 x i16> [[B]], <4 x float> [[C]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf16_w64(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16(<4 x i16> [[A]], <4 x i16> [[B]], <4 x float> [[C]])
+// CHECK-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_bf16_w64(global v4f* out, v4s a, v4s b, v4f c)
 {
@@ -45,12 +46,12 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w64(global v4f* out, v4s a, v4s b, v4f c
 // amdgcn_wmma_f16_16x16x16_f16
 //
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f16_16x16x16_f16_w64(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v4f16.v4f16(<4 x half> [[A]], <4 x half> [[B]], <4 x half> [[C]], i1 false)
-// CHECK-GFX1200-NEXT:    store <4 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_f16_16x16x16_f16_w64(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v4f16.v4f16(<4 x half> [[A]], <4 x half> [[B]], <4 x half> [[C]], i1 false)
+// CHECK-NEXT:    store <4 x half> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_f16_16x16x16_f16_w64(global v4h* out, v4h a, v4h b, v4h c)
 {
@@ -61,12 +62,12 @@ void test_amdgcn_wmma_f16_16x16x16_f16_w64(global v4h* out, v4h a, v4h b, v4h c)
 // amdgcn_wmma_bf16_16x16x16_bf16
 //
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_bf16_16x16x16_bf16_w64(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v4i16.v4i16(<4 x i16> [[A]], <4 x i16> [[B]], <4 x i16> [[C]], i1 false)
-// CHECK-GFX1200-NEXT:    store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_bf16_16x16x16_bf16_w64(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]], <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]], <4 x i16> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v4i16.v4i16(<4 x i16> [[A]], <4 x i16> [[B]], <4 x i16> [[C]], i1 false)
+// CHECK-NEXT:    store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_bf16_16x16x16_bf16_w64(global v4s* out, v4s a, v4s b, v4s c)
 {
@@ -77,12 +78,12 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w64(global v4s* out, v4s a, v4s b, v4s
 // amdgcn_wmma_i32_16x16x16_iu8
 //
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu8_w64(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i1 false)
-// CHECK-GFX1200-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu8_w64(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i1 false)
+// CHECK-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_i32_16x16x16_iu8_w64(global v4i* out, int a, int b, v4i c)
 {
@@ -93,79 +94,79 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w64(global v4i* out, int a, int b, v4i c)
 // amdgcn_wmma_i32_16x16x16_iu4
 //
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu4_w64(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i1 false)
-// CHECK-GFX1200-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x16_iu4_w64(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i1 false)
+// CHECK-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_i32_16x16x16_iu4_w64(global v4i* out, int a, int b, v4i c)
 {
   *out = __builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12(true, a, true, b, c, false);
 }
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]])
+// CHECK-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v4f* out, int a, int b, v4f c)
 {
   *out = __builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12(a, b, c);
 }
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]])
+// CHECK-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v4f* out, int a, int b, v4f c)
 {
   *out = __builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12(a, b, c);
 }
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]])
+// CHECK-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v4f* out, int a, int b, v4f c)
 {
   *out = __builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12(a, b, c);
 }
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]])
-// CHECK-GFX1200-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x float> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32 [[A]], i32 [[B]], <4 x float> [[C]])
+// CHECK-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v4f* out, int a, int b, v4f c)
 {
   *out = __builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12(a, b, c);
 }
 
-// CHECK-GFX1200-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x32_iu4_w32(
-// CHECK-GFX1200-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-GFX1200-NEXT:  [[ENTRY:.*:]]
-// CHECK-GFX1200-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i1 false)
-// CHECK-GFX1200-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
-// CHECK-GFX1200-NEXT:    ret void
+// CHECK-LABEL: define dso_local void @test_amdgcn_wmma_i32_16x16x32_iu4_w32(
+// CHECK-SAME: ptr addrspace(1) noundef writeonly captures(none) initializes((0, 16)) [[OUT:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 true, i32 [[A]], i1 true, i32 [[B]], <4 x i32> [[C]], i1 false)
+// CHECK-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT]], align 16, !tbaa [[CHAR_TBAA8]]
+// CHECK-NEXT:    ret void
 //
 void test_amdgcn_wmma_i32_16x16x32_iu4_w32(global v4i* out, int a, int b, v4i c)
 {
   *out = __builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12(true, a, true, b, c, false);
 }
 //.
-// CHECK-GFX1200: [[META6:![0-9]+]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0}
-// CHECK-GFX1200: [[META7]] = !{!"Simple C/C++ TBAA"}
-// CHECK-GFX1200: [[CHAR_TBAA8]] = !{[[META6]], [[META6]], i64 0}
+// CHECK: [[META6:![0-9]+]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0}
+// CHECK: [[META7]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[CHAR_TBAA8]] = !{[[META6]], [[META6]], i64 0}
 //.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w32-gfx10-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w32-gfx10-err.cl
index a1a56f0d8417d..ed72a8ee7dbd2 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w32-gfx10-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w32-gfx10-err.cl
@@ -21,14 +21,14 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w32(global v8f* out8f, v16s a16s, v16s b
                                             global v16s* out16s, v2i a2i, v2i b2i, v16s c16s,
                                             global v8i* out8i, v4i a4i, v4i b4i, v8i c8i)
 {
- *out8f = __builtin_amdgcn_wmma_f32_16x16x16_f16_w32(a16h, b16h, c8f);  // expected-error{{'__builtin_amdgcn_wmma_f32_16x16x16_f16_w32' needs target feature gfx11-insts,wavefrontsize32}}
- *out8f = __builtin_amdgcn_wmma_f32_16x16x16_bf16_w32(a16s, b16s, c8f);  // expected-error{{'__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32' needs target feature gfx11-insts,wavefrontsize32}}
- *out16h = __builtin_amdgcn_wmma_f16_16x16x16_f16_w32(a16h, b16h, c16h, true); // expected-error{{'__builtin_amdgcn_wmma_f16_16x16x16_f16_w32' needs target feature gfx11-insts,wavefrontsize32}}
- *out16s = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32(a16s, b16s, c16s, true); // expected-error{{'__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32' needs target feature gfx11-insts,wavefrontsize32}}
- *out16h = __builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32(a16h, b16h, c16h, true); // expected-error{{'__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32' needs target feature gfx11-insts,wavefrontsize32}}
- *out16s = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32(a16s, b16s, c16s, true); // expected-error{{'__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32' needs target feature gfx11-insts,wavefrontsize32}}
- *out8i = __builtin_amdgcn_wmma_i32_16x16x16_iu8_w32(true, a4i, true, b4i, c8i, false); // expected-error{{'__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32' needs target feature gfx11-insts,wavefrontsize32}}
- *out8i = __builtin_amdgcn_wmma_i32_16x16x16_iu4_w32(true, a2i, true, b2i, c8i, false); // expected-error{{'__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32' needs target feature gfx11-insts,wavefrontsize32}}
+ *out8f = __builtin_amdgcn_wmma_f32_16x16x16_f16_w32(a16h, b16h, c8f);  // expected-error{{'__builtin_amdgcn_wmma_f32_16x16x16_f16_w32' needs target feature wmma-256b-insts,wavefrontsize32}}
+ *out8f = __builtin_amdgcn_wmma_f32_16x16x16_bf16_w32(a16s, b16s, c8f);  // expected-error{{'__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32' needs target feature wmma-256b-insts,wavefrontsize32}}
+ *out16h = __builtin_amdgcn_wmma_f16_16x16x16_f16_w32(a16h, b16h, c16h, true); // expected-error{{'__builtin_amdgcn_wmma_f16_16x16x16_f16_w32' needs target feature wmma-256b-insts,wavefrontsize32}}
+ *out16s = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32(a16s, b16s, c16s, true); // expected-error{{'__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32' needs target feature wmma-256b-insts,wavefrontsize32}}
+ *out16h = __builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32(a16h, b16h, c16h, true); // expected-error{{'__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32' needs target feature wmma-256b-insts,wavefrontsize32}}
+ *out16s = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32(a16s, b16s, c16s, true); // expected-error{{'__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32' needs target feature wmma-256b-insts,wavefrontsize32}}
+ *out8i = __builtin_amdgcn_wmma_i32_16x16x16_iu8_w32(true, a4i, true, b4i, c8i, false); // expected-error{{'__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32' needs target feature wmma-256b-insts,wavefrontsize32}}
+ *out8i = __builtin_amdgcn_wmma_i32_16x16x16_iu4_w32(true, a2i, true, b2i, c8i, false); // expected-error{{'__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32' needs target feature wmma-256b-insts,wavefrontsize32}}
 }
 
 #endif
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w64-gfx10-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w64-gfx10-err.cl
index d995b1dc46be7..4b1808fe6d6e6 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w64-gfx10-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wmma-w64-gfx10-err.cl
@@ -21,14 +21,14 @@ void test_amdgcn_wmma_f32_16x16x16_bf16_w64(global v4f* out4f, v16h a16h, v16h b
                                             global v8s* out8s, v4i a4i, v4i b4i, v8s c8s,
                                             global v4i* out4i, v2i a2i, v2i b2i, v4i c4i)
 {
- *out4f = __builtin_amdgcn_wmma_f32_16x16x16_f16_w64(a16h, b16h, c4f);  // expected-error{{'__builtin_amdgcn_wmma_f32_16x16x16_f16_w64' needs target feature gfx11-insts,wavefrontsize64}}
- *out4f = __builtin_amdgcn_wmma_f32_16x16x16_bf16_w64(a16s, b16s, c4f);  // expected-error{{'__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64' needs target feature gfx11-insts,wavefrontsize64}}
- *out8h = __builtin_amdgcn_wmma_f16_16x16x16_f16_w64(a16h, b16h, c8h, true); // expected-error{{'__builtin_amdgcn_wmma_f16_16x16x16_f16_w64' needs target feature gfx11-insts,wavefrontsize64}}
- *out8s = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64(a16s, b16s, c8s, true); // expected-error{{'__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64' needs target feature gfx11-insts,wavefrontsize64}}
- *out8h = __builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64(a16h, b16h, c8h, true); // expected-error{{'__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64' needs target feature gfx11-insts,wavefrontsize64}}
- *out8s = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64(a16s, b16s, c8s, true); // expected-error{{'__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64' needs target feature gfx11-insts,wavefrontsize64}}
- *out4i = __builtin_amdgcn_wmma_i32_16x16x16_iu8_w64(true, a4i, true, b4i, c4i, false); // expected-error{{'__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64' needs target feature gfx11-insts,wavefrontsize64}}
- *out4i = __builtin_amdgcn_wmma_i32_16x16x16_iu4_w64(true, a2i, true, b2i, c4i, false); // expected-error{{'__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64' needs target feature gfx11-insts,wavefrontsize64}}
+ *out4f = __builtin_amdgcn_wmma_f32_16x16x16_f16_w64(a16h, b16h, c4f);  // expected-error{{'__builtin_amdgcn_wmma_f32_16x16x16_f16_w64' needs target feature wmma-256b-insts,wavefrontsize64}}
+ *out4f = __builtin_amdgcn_wmma_f32_16x16x16_bf16_w64(a16s, b16s, c4f);  // expected-error{{'__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64' needs target feature wmma-256b-insts,wavefrontsize64}}
+ *out8h = __builtin_amdgcn_wmma_f16_16x16x16_f16_w64(a16h, b16h, c8h, true); // expected-error{{'__builtin_amdgcn_wmma_f16_16x16x16_f16_w64' needs target feature wmma-256b-insts,wavefrontsize64}}
+ *out8s = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64(a16s, b16s, c8s, true); // expected-error{{'__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64' needs target feature wmma-256b-insts,wavefrontsize64}}
+ *out8h = __builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64(a16h, b16h, c8h, true); // expected-error{{'__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64' needs target feature wmma-256b-insts,wavefrontsize64}}
+ *out8s = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64(a16s, b16s, c8s, true); // expected-error{{'__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64' needs target feature wmma-256b-insts,wavefrontsize64}}
+ *out4i = __builtin_amdgcn_wmma_i32_16x16x16_iu8_w64(true, a4i, true, b4i, c4i, false); // expected-error{{'__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64' needs target feature wmma-256b-insts,wavefrontsize64}}
+ *out4i = __builtin_amdgcn_wmma_i32_16x16x16_iu4_w64(true, a2i, true, b2i, c4i, false); // expected-error{{'__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64' needs target feature wmma-256b-insts,wavefrontsize64}}
 }
 
 #endif
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 723b064afd64e..e97205295702e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -775,6 +775,14 @@ defm CvtFP8VOP1Bug : AMDGPUSubtargetFeature<"cvt-fp8-vop1-bug",
   [FeatureFP8ConversionInsts]
 >;
 
+defm WMMA256bInsts : AMDGPUSubtargetFeature<"wmma-256b-insts",
+  "Has WMMA instructions where A and B matrices have duplicated data"
+>;
+
+defm WMMA128bInsts : AMDGPUSubtargetFeature<"wmma-128b-insts",
+  "Has WMMA instructions where A and B matrices do not have duplicated data"
+>;
+
 defm PkFmacF16Inst : AMDGPUSubtargetFeature<"pk-fmac-f16-inst",
   "Has v_pk_fmac_f16 instruction"
 >;
@@ -1821,9 +1829,9 @@ def FeatureISAVersion11_Common : FeatureSet<
    FeatureD16Writes32BitVgpr,
 ]>;
 
-// There are few workarounds that need to be
-// added to all targets. This pessimizes codegen
-// a bit on the generic GFX11 target.
+// There are few workarounds that need to be added to all targets. This
+// pessimizes codegen a bit on the generic GFX11 target. This generic target
+// does not include GFX1170 due to incompatible changes.
 def FeatureISAVersion11_Generic: FeatureSet<
   !listconcat(FeatureISAVersion11_Common.Features,
     [FeatureMSAALoadDstSelBug,
@@ -1832,14 +1840,16 @@ def FeatureISAVersion11_Generic: FeatureSet<
      FeatureMADIntraFwdBug,
      FeaturePrivEnabledTrap2NopBug,
      FeatureRequiresCOV6,
-     FeatureRequiredExportPriority])>;
+     FeatureRequiredExportPriority,
+     FeatureWMMA256bInsts])>;
 
 def FeatureISAVersion11_0_Common : FeatureSet<
   !listconcat(FeatureISAVersion11_Common.Features,
     [FeatureMSAALoadDstSelBug,
      FeatureVALUTransUseHazard,
      FeatureMADIntraFwdBug,
-     FeaturePrivEnabledTrap2NopBug])>;
+     FeaturePrivEnabledTrap2NopBug,
+     FeatureWMMA256bInsts])>;
 
 def FeatureISAVersion11_0_0 : FeatureSet<
   !listconcat(FeatureISAVersion11_0_Common.Features,
@@ -1862,7 +1872,8 @@ def FeatureISAVersion11_5_Common : FeatureSet<
   !listconcat(FeatureISAVersion11_Common.Features,
     [FeatureSALUFloatInsts,
      FeatureDPPSrc1SGPR,
-     FeatureRequiredExportPriority])>;
+     FeatureRequiredExportPriority,
+     FeatureWMMA256bInsts])>;
 
 def FeatureISAVersion11_5_0 : FeatureSet<
   !listconcat(FeatureISAVersion11_5_Common.Features,
@@ -1886,7 +1897,8 @@ def FeatureISAVersion11_7_0 : FeatureSet<
     [FeatureSALUFloatInsts,
      FeatureDPPSrc1SGPR,
      FeatureFP8ConversionInsts,
-     FeatureDot11Insts])>;
+     FeatureDot11Insts,
+     FeatureWMMA128bInsts])>;
 
 def FeatureISAVersion12 : FeatureSet<
   [FeatureGFX12,
@@ -1916,6 +1928,7 @@ def FeatureISAVersion12 : FeatureSet<
    FeatureImageInsts,
    FeatureExtendedImageInsts,
    FeatureFP8ConversionInsts,
+   FeatureWMMA128bInsts,
    FeatureIEEEMinimumMaximumInsts,
    FeaturePackedTID,
    FeatureVcmpxPermlaneHazard,
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 7729d27c6311b..435f65581500e 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1554,6 +1554,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
     return AMDGPU::isGFX11Plus(getSTI());
   }
 
+  bool isGFX1170() const { return AMDGPU::isGFX1170(getSTI()); }
+
   bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
 
   bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index b2dfd098735a0..2309a56f612f1 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -686,11 +686,19 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                         Address, CS))
         break;
 
+      if (isGFX1170() &&
+          tryDecodeInst(DecoderTableGFX117064, MI, QW, Address, CS))
+        break;
+
       if (isGFX11() &&
           tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
                         Address, CS))
         break;
 
+      if (isGFX1170() &&
+          tryDecodeInst(DecoderTableGFX1170W6464, MI, QW, Address, CS))
+        break;
+
       if (isGFX11() &&
           tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
         break;
@@ -2247,6 +2255,8 @@ bool AMDGPUDisassembler::isGFX11Plus() const {
   return AMDGPU::isGFX11Plus(STI);
 }
 
+bool AMDGPUDisassembler::isGFX1170() const { return AMDGPU::isGFX1170(STI); }
+
 bool AMDGPUDisassembler::isGFX12() const {
   return STI.hasFeature(AMDGPU::FeatureGFX12);
 }
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 28f71d8d7556b..b01eb8dd59fad 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -178,6 +178,7 @@ class AMDGPUDisassembler : public MCDisassembler {
   bool isGFX10() const;
   bool isGFX10Plus() const;
   bool isGFX11() const;
+  bool isGFX1170() const;
   bool isGFX11Plus() const;
   bool isGFX12() const;
   bool isGFX12Plus() const;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index b308e0d77305f..2365b6175a46f 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -396,6 +396,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
   }
 
+  bool isGFX1170() const {
+    return getGeneration() == GFX11 && hasWMMA128bInsts();
+  }
+
   bool hasMad64_32() const { return getGeneration() >= SEA_ISLANDS; }
 
   bool hasAtomicFaddInsts() const {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index d38e08d6c79a7..38a7c1785f404 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -44,9 +44,10 @@ class GFXGen<Predicate pred, string dn, string suffix, int sub> {
 def GFX13Gen         : GFXGen<isGFX13Only, "GFX13", "_gfx13", SIEncodingFamily.GFX13>;
 def GFX1250Gen       : GFXGen<isGFX125xOnly, "GFX1250", "_gfx1250", SIEncodingFamily.GFX1250>;
 def GFX12Not12_50Gen : GFXGen<isGFX12Not12_50, "GFX12", "_gfx12", SIEncodingFamily.GFX12>;
-def GFX12Gen : GFXGen<isGFX12Only, "GFX12", "_gfx12", SIEncodingFamily.GFX12>;
-def GFX11Gen : GFXGen<isGFX11Only, "GFX11", "_gfx11", SIEncodingFamily.GFX11>;
-def GFX10Gen : GFXGen<isGFX10Only, "GFX10", "_gfx10", SIEncodingFamily.GFX10>;
+def GFX12Gen         : GFXGen<isGFX12Only, "GFX12", "_gfx12", SIEncodingFamily.GFX12>;
+def GFX1170Gen       : GFXGen<isGFX11Only, "GFX1170", "_gfx1170", SIEncodingFamily.GFX11>;
+def GFX11Gen         : GFXGen<isGFX11Only, "GFX11", "_gfx11", SIEncodingFamily.GFX11>;
+def GFX10Gen         : GFXGen<isGFX10Only, "GFX10", "_gfx10", SIEncodingFamily.GFX10>;
 
 //===----------------------------------------------------------------------===//
 // SI DAG Nodes
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index f12bd6158c1be..2711b3044a6b5 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2598,6 +2598,10 @@ bool isGFX11(const MCSubtargetInfo &STI) {
   return STI.hasFeature(AMDGPU::FeatureGFX11);
 }
 
+bool isGFX1170(const MCSubtargetInfo &STI) {
+  return isGFX11(STI) && STI.hasFeature(AMDGPU::FeatureWMMA128bInsts);
+}
+
 bool isGFX11Plus(const MCSubtargetInfo &STI) {
   return isGFX11(STI) || isGFX12Plus(STI);
 }
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index dc44ab134f108..ccf3e40ab1cef 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1705,6 +1705,7 @@ bool isGFX10Plus(const MCSubtargetInfo &STI);
 bool isNotGFX10Plus(const MCSubtargetInfo &STI);
 bool isGFX10Before1030(const MCSubtargetInfo &STI);
 bool isGFX11(const MCSubtargetInfo &STI);
+bool isGFX1170(const MCSubtargetInfo &STI);
 bool isGFX11Plus(const MCSubtargetInfo &STI);
 bool isGFX12(const MCSubtargetInfo &STI);
 bool isGFX12Plus(const MCSubtargetInfo &STI);
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 063546e1a5bc2..b8cdb2460a166 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -1426,22 +1426,18 @@ multiclass WMMAInst<string Suffix, string Instr, VOPProfile P, SDPatternOperator
   defvar WMMAConstraints3Addr = "@earlyclobber $vdst";
 
   defvar WMMAProfile = VOPProfileWMMA<P, Suffix, _Src01RC64, Type.hasClamp, Type.hasOpsel>;
-  let isConvergent = 1, Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
-    let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = convertibleTo3Addr in {
-      def _twoaddr # Suffix : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
-    }
-  }
-  if convertibleTo3Addr then {
+
+  let SubtargetPredicate = HasWMMA256bInsts in {
     let isConvergent = 1, Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
-      let Constraints = WMMAConstraints3Addr, SchedRW = [Write32Bit, Write32Bit] in {
-        def _threeaddr # Suffix : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
+      let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = convertibleTo3Addr in {
+        def _twoaddr # Suffix : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
+      }
+      if convertibleTo3Addr then {
+        let Constraints = WMMAConstraints3Addr, SchedRW = [Write32Bit, Write32Bit] in {
+          def _threeaddr # Suffix : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
+        }
       }
     }
-    def : WMMAOpcodeMapping<!cast<Instruction>(NAME # _twoaddr # Suffix),
-                          !cast<Instruction>(NAME # _threeaddr # Suffix)>;
-  }
-
-  let SubtargetPredicate = isGFX11Only in {
     if !eq(Type, WMMAOpSel) then {
       def : WMMAOpSelPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>;
     } else if !eq(Type, WMMAUIClamp) then {
@@ -1450,6 +1446,11 @@ multiclass WMMAInst<string Suffix, string Instr, VOPProfile P, SDPatternOperator
       def : WMMARegularPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>;
     }
   }
+
+  if convertibleTo3Addr then {
+    def : WMMAOpcodeMapping<!cast<Instruction>(NAME # _twoaddr # Suffix),
+                            !cast<Instruction>(NAME # _threeaddr # Suffix)>;
+  }
 }
 
 
@@ -1731,7 +1732,7 @@ multiclass WMMAInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string Pse
   defvar WMMAConstraints2Addr = !if(DiffVdstSrc2, "@earlyclobber $vdst", "@earlyclobber $vdst,$vdst = $src2");
   defvar WMMAConstraints3Addr = "@earlyclobber $vdst";
 
-  let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0, isConvergent = 1 in {
+  let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0, isConvergent = 1, SubtargetPredicate = HasWMMA128bInsts in {
     let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = 1 in
       def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>, WMMAInstInfo {
         let PseudoInstr = Instr#PseudoInstrSuffix;
@@ -2051,7 +2052,7 @@ class SWMMACPat_w64<Instruction Inst, SDPatternOperator node, VOP3PWMMA_Profile
             let WaveSizePredicate = isWave64;
           }
 
-let WaveSizePredicate = isWave32, SubtargetPredicate = isGFX12PlusNot12_50 in {
+let WaveSizePredicate = isWave32, SubtargetPredicate = isGFX11PlusNot12_50, OtherPredicates = [HasWMMA128bInsts] in {
   defm : WMMAPat<"V_WMMA_F32_16X16X16_F16_w32",     int_amdgcn_wmma_f32_16x16x16_f16,     F32_F16_WMMA_w32>;
   defm : WMMAPat<"V_WMMA_F32_16X16X16_BF16_w32",    int_amdgcn_wmma_f32_16x16x16_bf16,    F32_BF16_WMMA_w32>;
   defm : WMMAPat<"V_WMMA_F16_16X16X16_F16_w32",     int_amdgcn_wmma_f16_16x16x16_f16,     F16_F16_WMMA_w32,1>;
@@ -2078,7 +2079,7 @@ let WaveSizePredicate = isWave32, SubtargetPredicate = isGFX12PlusNot12_50 in {
   def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF8_BF8_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf8_bf8, F32_FP8BF8_SWMMAC_w32>;
 }
 
-let WaveSizePredicate = isWave64, SubtargetPredicate = isGFX12PlusNot12_50 in {
+let WaveSizePredicate = isWave64, SubtargetPredicate = isGFX11PlusNot12_50, OtherPredicates = [HasWMMA128bInsts] in {
   defm : WMMAPat<"V_WMMA_F32_16X16X16_F16_w64",     int_amdgcn_wmma_f32_16x16x16_f16,     F32_F16_WMMA_w64>;
   defm : WMMAPat<"V_WMMA_F32_16X16X16_BF16_w64",    int_amdgcn_wmma_f32_16x16x16_bf16,    F32_BF16_WMMA_w64>;
   defm : WMMAPat<"V_WMMA_F16_16X16X16_F16_w64",     int_amdgcn_wmma_f16_16x16x16_f16,     F16_F16_WMMA_w64,1>;
@@ -2233,6 +2234,18 @@ multiclass VOP3P_WMMA_Real_Base<GFXGen Gen, bits<8> op, VOP3PWMMA_Profile WMMAP,
     VOP3PeWmma<op, !cast<VOP3P_Pseudo>(backing_ps_name).Pfl, WMMAP>;
 }
 
+multiclass VOP3P_Real_WMMA_gfx1170 <bits<8> op, VOP3PWMMA_Profile WMMAP> {
+  let WaveSizePredicate = isWave32, DecoderNamespace = "GFX1170" in {
+    defm _twoaddr : VOP3P_WMMA_Real_Base <GFX1170Gen, op, WMMAP>;
+  }
+}
+
+multiclass VOP3P_Real_WMMA_gfx1170w64 <bits<8> op, VOP3PWMMA_Profile WMMAP> {
+  let WaveSizePredicate = isWave64, DecoderNamespace = "GFX1170W64" in {
+    defm _twoaddr : VOP3P_WMMA_Real_Base <GFX1170Gen, op, WMMAP>;
+  }
+}
+
 multiclass VOP3P_Real_WMMA_gfx12 <bits<8> op, VOP3PWMMA_Profile WMMAP> {
   let WaveSizePredicate = isWave32, DecoderNamespace = "GFX12" in {
     defm _twoaddr : VOP3P_WMMA_Real_Base <GFX12Gen, op, WMMAP>;
@@ -2245,6 +2258,14 @@ multiclass VOP3P_Real_WMMA_gfx12w64 <bits<8> op, VOP3PWMMA_Profile WMMAP> {
   }
 }
 
+multiclass VOP3P_Real_WMMA_gfx1170_gfx12 <bits<8> op, VOP3PWMMA_Profile WMMAP> :
+  VOP3P_Real_WMMA_gfx1170<op, WMMAP>,
+  VOP3P_Real_WMMA_gfx12<op, WMMAP>;
+
+multiclass VOP3P_Real_WMMA_gfx1170_gfx12w64 <bits<8> op, VOP3PWMMA_Profile WMMAP> :
+  VOP3P_Real_WMMA_gfx1170w64<op, WMMAP>,
+  VOP3P_Real_WMMA_gfx12w64<op, WMMAP>;
+
 multiclass VOP3P_Real_WMMA_gfx1250 <bits<8> op, VOP3PWMMA_Profile WMMAP> {
   let WaveSizePredicate = isWave32, DecoderNamespace = "GFX12" in {
     defm _twoaddr : VOP3P_WMMA_Real_Base <GFX1250Gen, op, WMMAP>;
@@ -2349,54 +2370,53 @@ multiclass VOP3PX2_Real_ScaledWMMA_SrcFormats<string Gen, bits<8> op, bits<8> Ld
   }
 }
 
-defm V_WMMA_F32_16X16X16_F16_w32     : VOP3P_Real_WMMA_gfx12 <0x040, F32_F16_WMMA_w32>;
-defm V_WMMA_F32_16X16X16_BF16_w32    : VOP3P_Real_WMMA_gfx12 <0x041, F32_BF16_WMMA_w32>;
-defm V_WMMA_F16_16X16X16_F16_w32     : VOP3P_Real_WMMA_gfx12 <0x042, F16_F16_WMMA_w32>;
-defm V_WMMA_BF16_16X16X16_BF16_w32   : VOP3P_Real_WMMA_gfx12 <0x043, BF16_BF16_WMMA_w32>;
-defm V_WMMA_I32_16X16X16_IU8_w32     : VOP3P_Real_WMMA_gfx12 <0x044, I32_IU8_WMMA_w32>;
-defm V_WMMA_I32_16X16X16_IU4_w32     : VOP3P_Real_WMMA_gfx12 <0x045, I32_IU4X16_WMMA_w32>;
-defm V_WMMA_F32_16X16X16_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x046, F32_FP8BF8_WMMA_w32>;
-defm V_WMMA_F32_16X16X16_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x047, F32_FP8BF8_WMMA_w32>;
-defm V_WMMA_F32_16X16X16_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x048, F32_FP8BF8_WMMA_w32>;
-defm V_WMMA_F32_16X16X16_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x049, F32_FP8BF8_WMMA_w32>;
-defm V_WMMA_I32_16X16X32_IU4_w32     : VOP3P_Real_WMMA_gfx12 <0x04a, I32_IU4X32_WMMA_w32>;
-
-defm V_WMMA_F32_16X16X16_F16_w64     : VOP3P_Real_WMMA_gfx12w64 <0x040, F32_F16_WMMA_w64>;
-defm V_WMMA_F32_16X16X16_BF16_w64    : VOP3P_Real_WMMA_gfx12w64 <0x041, F32_BF16_WMMA_w64>;
-defm V_WMMA_F16_16X16X16_F16_w64     : VOP3P_Real_WMMA_gfx12w64 <0x042, F16_F16_WMMA_w64>;
-defm V_WMMA_BF16_16X16X16_BF16_w64   : VOP3P_Real_WMMA_gfx12w64 <0x043, BF16_BF16_WMMA_w64>;
-defm V_WMMA_I32_16X16X16_IU8_w64     : VOP3P_Real_WMMA_gfx12w64 <0x044, I32_IU8_WMMA_w64>;
-defm V_WMMA_I32_16X16X16_IU4_w64     : VOP3P_Real_WMMA_gfx12w64 <0x045, I32_IU4X16_WMMA_w64>;
-defm V_WMMA_F32_16X16X16_FP8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x046, F32_FP8BF8_WMMA_w64>;
-defm V_WMMA_F32_16X16X16_FP8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x047, F32_FP8BF8_WMMA_w64>;
-defm V_WMMA_F32_16X16X16_BF8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x048, F32_FP8BF8_WMMA_w64>;
-defm V_WMMA_F32_16X16X16_BF8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x049, F32_FP8BF8_WMMA_w64>;
-defm V_WMMA_I32_16X16X32_IU4_w64     : VOP3P_Real_WMMA_gfx12w64 <0x04a, I32_IU4X32_WMMA_w64>;
-
-
-defm V_SWMMAC_F32_16X16X32_F16_w32     : VOP3P_Real_WMMA_gfx12 <0x050, F32_F16_SWMMAC_w32>;
-defm V_SWMMAC_F32_16X16X32_BF16_w32    : VOP3P_Real_WMMA_gfx12 <0x051, F32_BF16_SWMMAC_w32>;
-defm V_SWMMAC_F16_16X16X32_F16_w32     : VOP3P_Real_WMMA_gfx12 <0x052, F16_F16_SWMMAC_w32>;
-defm V_SWMMAC_BF16_16X16X32_BF16_w32   : VOP3P_Real_WMMA_gfx12 <0x053, BF16_BF16_SWMMAC_w32>;
-defm V_SWMMAC_I32_16X16X32_IU8_w32     : VOP3P_Real_WMMA_gfx12 <0x054, I32_IU8_SWMMAC_w32>;
-defm V_SWMMAC_I32_16X16X32_IU4_w32     : VOP3P_Real_WMMA_gfx12 <0x055, I32_IU4X32_SWMMAC_w32>;
-defm V_SWMMAC_I32_16X16X64_IU4_w32     : VOP3P_Real_WMMA_gfx12 <0x056, I32_IU4X64_SWMMAC_w32>;
-defm V_SWMMAC_F32_16X16X32_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x057, F32_FP8BF8_SWMMAC_w32>;
-defm V_SWMMAC_F32_16X16X32_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x058, F32_FP8BF8_SWMMAC_w32>;
-defm V_SWMMAC_F32_16X16X32_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x059, F32_FP8BF8_SWMMAC_w32>;
-defm V_SWMMAC_F32_16X16X32_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x05a, F32_FP8BF8_SWMMAC_w32>;
-
-defm V_SWMMAC_F32_16X16X32_F16_w64     : VOP3P_Real_WMMA_gfx12w64 <0x050, F32_F16_SWMMAC_w64>;
-defm V_SWMMAC_F32_16X16X32_BF16_w64    : VOP3P_Real_WMMA_gfx12w64 <0x051, F32_BF16_SWMMAC_w64>;
-defm V_SWMMAC_F16_16X16X32_F16_w64     : VOP3P_Real_WMMA_gfx12w64 <0x052, F16_F16_SWMMAC_w64>;
-defm V_SWMMAC_BF16_16X16X32_BF16_w64   : VOP3P_Real_WMMA_gfx12w64 <0x053, BF16_BF16_SWMMAC_w64>;
-defm V_SWMMAC_I32_16X16X32_IU8_w64     : VOP3P_Real_WMMA_gfx12w64 <0x054, I32_IU8_SWMMAC_w64>;
-defm V_SWMMAC_I32_16X16X32_IU4_w64     : VOP3P_Real_WMMA_gfx12w64 <0x055, I32_IU4X32_SWMMAC_w64>;
-defm V_SWMMAC_I32_16X16X64_IU4_w64     : VOP3P_Real_WMMA_gfx12w64 <0x056, I32_IU4X64_SWMMAC_w64>;
-defm V_SWMMAC_F32_16X16X32_FP8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x057, F32_FP8BF8_SWMMAC_w64>;
-defm V_SWMMAC_F32_16X16X32_FP8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x058, F32_FP8BF8_SWMMAC_w64>;
-defm V_SWMMAC_F32_16X16X32_BF8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x059, F32_FP8BF8_SWMMAC_w64>;
-defm V_SWMMAC_F32_16X16X32_BF8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x05a, F32_FP8BF8_SWMMAC_w64>;
+defm V_WMMA_F32_16X16X16_F16_w32     : VOP3P_Real_WMMA_gfx1170_gfx12 <0x040, F32_F16_WMMA_w32>;
+defm V_WMMA_F32_16X16X16_BF16_w32    : VOP3P_Real_WMMA_gfx1170_gfx12 <0x041, F32_BF16_WMMA_w32>;
+defm V_WMMA_F16_16X16X16_F16_w32     : VOP3P_Real_WMMA_gfx1170_gfx12 <0x042, F16_F16_WMMA_w32>;
+defm V_WMMA_BF16_16X16X16_BF16_w32   : VOP3P_Real_WMMA_gfx1170_gfx12 <0x043, BF16_BF16_WMMA_w32>;
+defm V_WMMA_I32_16X16X16_IU8_w32     : VOP3P_Real_WMMA_gfx1170_gfx12 <0x044, I32_IU8_WMMA_w32>;
+defm V_WMMA_I32_16X16X16_IU4_w32     : VOP3P_Real_WMMA_gfx1170_gfx12 <0x045, I32_IU4X16_WMMA_w32>;
+defm V_WMMA_F32_16X16X16_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx1170_gfx12 <0x046, F32_FP8BF8_WMMA_w32>;
+defm V_WMMA_F32_16X16X16_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx1170_gfx12 <0x047, F32_FP8BF8_WMMA_w32>;
+defm V_WMMA_F32_16X16X16_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx1170_gfx12 <0x048, F32_FP8BF8_WMMA_w32>;
+defm V_WMMA_F32_16X16X16_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx1170_gfx12 <0x049, F32_FP8BF8_WMMA_w32>;
+defm V_WMMA_I32_16X16X32_IU4_w32     : VOP3P_Real_WMMA_gfx1170_gfx12 <0x04a, I32_IU4X32_WMMA_w32>;
+
+defm V_WMMA_F32_16X16X16_F16_w64     : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x040, F32_F16_WMMA_w64>;
+defm V_WMMA_F32_16X16X16_BF16_w64    : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x041, F32_BF16_WMMA_w64>;
+defm V_WMMA_F16_16X16X16_F16_w64     : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x042, F16_F16_WMMA_w64>;
+defm V_WMMA_BF16_16X16X16_BF16_w64   : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x043, BF16_BF16_WMMA_w64>;
+defm V_WMMA_I32_16X16X16_IU8_w64     : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x044, I32_IU8_WMMA_w64>;
+defm V_WMMA_I32_16X16X16_IU4_w64     : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x045, I32_IU4X16_WMMA_w64>;
+defm V_WMMA_F32_16X16X16_FP8_FP8_w64 : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x046, F32_FP8BF8_WMMA_w64>;
+defm V_WMMA_F32_16X16X16_FP8_BF8_w64 : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x047, F32_FP8BF8_WMMA_w64>;
+defm V_WMMA_F32_16X16X16_BF8_FP8_w64 : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x048, F32_FP8BF8_WMMA_w64>;
+defm V_WMMA_F32_16X16X16_BF8_BF8_w64 : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x049, F32_FP8BF8_WMMA_w64>;
+defm V_WMMA_I32_16X16X32_IU4_w64     : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x04a, I32_IU4X32_WMMA_w64>;
+
+defm V_SWMMAC_F32_16X16X32_F16_w32     : VOP3P_Real_WMMA_gfx1170_gfx12 <0x050, F32_F16_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X32_BF16_w32    : VOP3P_Real_WMMA_gfx1170_gfx12 <0x051, F32_BF16_SWMMAC_w32>;
+defm V_SWMMAC_F16_16X16X32_F16_w32     : VOP3P_Real_WMMA_gfx1170_gfx12 <0x052, F16_F16_SWMMAC_w32>;
+defm V_SWMMAC_BF16_16X16X32_BF16_w32   : VOP3P_Real_WMMA_gfx1170_gfx12 <0x053, BF16_BF16_SWMMAC_w32>;
+defm V_SWMMAC_I32_16X16X32_IU8_w32     : VOP3P_Real_WMMA_gfx1170_gfx12 <0x054, I32_IU8_SWMMAC_w32>;
+defm V_SWMMAC_I32_16X16X32_IU4_w32     : VOP3P_Real_WMMA_gfx1170_gfx12 <0x055, I32_IU4X32_SWMMAC_w32>;
+defm V_SWMMAC_I32_16X16X64_IU4_w32     : VOP3P_Real_WMMA_gfx1170_gfx12 <0x056, I32_IU4X64_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X32_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx1170_gfx12 <0x057, F32_FP8BF8_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X32_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx1170_gfx12 <0x058, F32_FP8BF8_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X32_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx1170_gfx12 <0x059, F32_FP8BF8_SWMMAC_w32>;
+defm V_SWMMAC_F32_16X16X32_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx1170_gfx12 <0x05a, F32_FP8BF8_SWMMAC_w32>;
+
+defm V_SWMMAC_F32_16X16X32_F16_w64     : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x050, F32_F16_SWMMAC_w64>;
+defm V_SWMMAC_F32_16X16X32_BF16_w64    : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x051, F32_BF16_SWMMAC_w64>;
+defm V_SWMMAC_F16_16X16X32_F16_w64     : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x052, F16_F16_SWMMAC_w64>;
+defm V_SWMMAC_BF16_16X16X32_BF16_w64   : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x053, BF16_BF16_SWMMAC_w64>;
+defm V_SWMMAC_I32_16X16X32_IU8_w64     : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x054, I32_IU8_SWMMAC_w64>;
+defm V_SWMMAC_I32_16X16X32_IU4_w64     : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x055, I32_IU4X32_SWMMAC_w64>;
+defm V_SWMMAC_I32_16X16X64_IU4_w64     : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x056, I32_IU4X64_SWMMAC_w64>;
+defm V_SWMMAC_F32_16X16X32_FP8_FP8_w64 : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x057, F32_FP8BF8_SWMMAC_w64>;
+defm V_SWMMAC_F32_16X16X32_FP8_BF8_w64 : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x058, F32_FP8BF8_SWMMAC_w64>;
+defm V_SWMMAC_F32_16X16X32_BF8_FP8_w64 : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x059, F32_FP8BF8_SWMMAC_w64>;
+defm V_SWMMAC_F32_16X16X32_BF8_BF8_w64 : VOP3P_Real_WMMA_gfx1170_gfx12w64 <0x05a, F32_FP8BF8_SWMMAC_w64>;
 
 defm V_WMMA_F32_16X16X4_F32_w32       : VOP3P_Real_WMMA_gfx1250 <0x05d, F32_F32_WMMA_w32>;
 defm V_WMMA_F32_16X16X32_BF16_w32     : VOP3P_Real_WMMA_gfx1250 <0x062, F32_BF16X32_WMMA_w32>;
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index fc5d7519bdffe..d5f8b9b2c5729 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -515,13 +515,38 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["qsad-insts"] = true;
     Features["cvt-pknorm-vop2-insts"] = true;
     Features["fp8-conversion-insts"] = true;
+    Features["wmma-128b-insts"] = true;
     Features["atomic-fmin-fmax-global-f32"] = true;
     break;
   case GK_GFX1170:
-    // TODO-GFX1170: Update features map for gfx1170
+    Features["ci-insts"] = true;
+    Features["dot5-insts"] = true;
+    Features["dot7-insts"] = true;
+    Features["dot8-insts"] = true;
+    Features["dot9-insts"] = true;
+    Features["dot10-insts"] = true;
+    Features["dot12-insts"] = true;
+    Features["dl-insts"] = true;
+    Features["16-bit-insts"] = true;
+    Features["dpp"] = true;
+    Features["gfx8-insts"] = true;
+    Features["gfx9-insts"] = true;
+    Features["gfx10-insts"] = true;
+    Features["gfx10-3-insts"] = true;
+    Features["gfx11-insts"] = true;
+    Features["atomic-fadd-rtn-insts"] = true;
+    Features["image-insts"] = true;
+    Features["cube-insts"] = true;
+    Features["lerp-inst"] = true;
+    Features["sad-insts"] = true;
+    Features["qsad-insts"] = true;
+    Features["cvt-pknorm-vop2-insts"] = true;
+    Features["gws"] = true;
     Features["dot11-insts"] = true;
     Features["fp8-conversion-insts"] = true;
-    [[fallthrough]];
+    Features["wmma-128b-insts"] = true;
+    Features["atomic-fmin-fmax-global-f32"] = true;
+    break;
   case GK_GFX1153:
   case GK_GFX1152:
   case GK_GFX1151:
@@ -554,6 +579,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["qsad-insts"] = true;
     Features["cvt-pknorm-vop2-insts"] = true;
     Features["gws"] = true;
+    Features["wmma-256b-insts"] = true;
     Features["atomic-fmin-fmax-global-f32"] = true;
     break;
   case GK_GFX1036:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
index 9693d544d1535..450cd0701911a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
@@ -1,14 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negA(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <8 x half> %A
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f16.v8f32(<8 x half> %fneg.A, <8 x half> %B, <8 x float> %C)
@@ -17,13 +18,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negB(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negB:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negB:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <8 x half> %B
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f16.v8f32(<8 x half> %A, <8 x half> %fneg.B, <8 x float> %C)
@@ -32,13 +33,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negC(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <8 x float> %C
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f16.v8f32(<8 x half> %A, <8 x half> %B, <8 x float> %fneg.C)
@@ -47,13 +48,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_absC(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x float> @llvm.fabs.v8f32(<8 x float> %C)
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f16.v8f32(<8 x half> %A, <8 x half> %B, <8 x float> %fabs.C)
@@ -62,13 +63,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16_negC(<8 x i16> %A, <8 x i16> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <8 x float> %C
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8i16.v8f32(<8 x i16> %A, <8 x i16> %B, <8 x float> %fneg.C)
@@ -77,13 +78,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16_absC(<8 x i16> %A, <8 x i16> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x float> @llvm.fabs.v8f32(<8 x float> %C)
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8i16.v8f32(<8 x i16> %A, <8 x i16> %B, <8 x float> %fabs.C)
@@ -92,11 +93,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negA(<8 x half> %A, <8 x half> %B, <8 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <8 x half> %A
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %fneg.A, <8 x half> %B, <8 x half> %C, i1 0)
@@ -105,11 +106,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negB(<8 x half> %A, <8 x half> %B, <8 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negB:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negB:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <8 x half> %B
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %fneg.B, <8 x half> %C, i1 0)
@@ -118,11 +119,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC(<8 x half> %A, <8 x half> %B, <8 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <8 x half> %C
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %fneg.C, i1 0)
@@ -131,11 +132,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_absC(<8 x half> %A, <8 x half> %B, <8 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x half> @llvm.fabs.v8f16(<8 x half> %C)
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %fabs.C, i1 0)
@@ -144,13 +145,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8_negC(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <8 x float> %C
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> %fneg.C)
@@ -159,13 +160,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8_absC(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x float> @llvm.fabs.v8f32(<8 x float> %C)
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> %fabs.C)
@@ -174,13 +175,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8_negC(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <8 x float> %C
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> %fneg.C)
@@ -189,13 +190,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8_absC(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x float> @llvm.fabs.v8f32(<8 x float> %C)
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> %fabs.C)
@@ -204,13 +205,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8_negC(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <8 x float> %C
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> %fneg.C)
@@ -219,13 +220,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8_absC(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x float> @llvm.fabs.v8f32(<8 x float> %C)
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> %fabs.C)
@@ -234,13 +235,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8_negC(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <8 x float> %C
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> %fneg.C)
@@ -249,13 +250,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8_absC(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x float> @llvm.fabs.v8f32(<8 x float> %C)
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> %fabs.C)
@@ -264,13 +265,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_negA(<8 x half> %A, <16 x half> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_negA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[21:22], v[12:15], off
-; GFX12-NEXT:    global_store_b128 v[21:22], v[16:19], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_f16_negA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[21:22], v[12:15], off
+; GCN-NEXT:    global_store_b128 v[21:22], v[16:19], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <8 x half> %A
   %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f16.v16f16.v8f32.i16(<8 x half> %fneg.A, <16 x half> %B, <8 x float> %C, i16 %Index)
@@ -279,13 +280,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_negB(<8 x half> %A, <16 x half> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_negB:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[21:22], v[12:15], off
-; GFX12-NEXT:    global_store_b128 v[21:22], v[16:19], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_f16_negB:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[21:22], v[12:15], off
+; GCN-NEXT:    global_store_b128 v[21:22], v[16:19], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <16 x half> %B
   %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f16.v16f16.v8f32.i16(<8 x half> %A, <16 x half> %fneg.B, <8 x float> %C, i16 %Index)
@@ -294,11 +295,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_negA(<8 x half> %A, <16 x half> %B, <8 x half> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f16_16x16x32_f16_negA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[17:18], v[12:15], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f16_16x16x32_f16_negA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[17:18], v[12:15], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <8 x half> %A
   %res = call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v16f16.v8f16.i16(<8 x half> %fneg.A, <16 x half> %B, <8 x half> %C, i16 %Index)
@@ -307,11 +308,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_negB(<8 x half> %A, <16 x half> %B, <8 x half> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f16_16x16x32_f16_negB:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[17:18], v[12:15], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f16_16x16x32_f16_negB:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[17:18], v[12:15], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <16 x half> %B
   %res = call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v16f16.v8f16.i16(<8 x half> %A, <16 x half> %fneg.B, <8 x half> %C, i16 %Index)
@@ -322,13 +323,13 @@ bb:
 ; both neg and abs patterns (wmma matrix C f32 or f16 )
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negabsC(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negabsC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negabsC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x float> @llvm.fabs.v8f32(<8 x float> %C)
   %fneg.fabs.C = fneg <8 x float> %fabs.C
@@ -338,11 +339,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negabsC(<8 x half> %A, <8 x half> %B, <8 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negabsC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negabsC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x half> @llvm.fabs.v8f16(<8 x half> %C)
   %fneg.fabs.C = fneg <8 x half> %fabs.C
@@ -352,15 +353,15 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_neg_partial_fabsA(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_neg_partial_fabsA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_and_b32_e32 v11, 0x7fffffff, v11
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_neg_partial_fabsA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_and_b32_e32 v11, 0x7fffffff, v11
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %el3 = extractelement <8 x float> %C, i32 3
   %el3.fabs = call float @llvm.fabs.f32(float %el3)
@@ -374,13 +375,13 @@ bb:
 ; A or B matrix modifier and constant in C
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negA_constantC(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negA_constantC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[10:17], v[0:3], v[4:7], 1.0 neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negA_constantC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[10:17], v[0:3], v[4:7], 1.0 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <8 x half> %A
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f16.v8f32(<8 x half> %fneg.A, <8 x half> %B, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>)
@@ -389,11 +390,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negB_constantC(<8 x half> %A, <8 x half> %B, <8 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negB_constantC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[10:13], v[0:3], v[4:7], 1.0 neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negB_constantC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[10:13], v[0:3], v[4:7], 1.0 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <8 x half> %B
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %fneg.B, <8 x half> <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>, i1 0)
@@ -404,6 +405,27 @@ bb:
 ; pack f16 elements with v_perm_b32 since they don't come from same b32
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<8 x half> %A, <8 x half> %B, ptr %Caddr, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_wmma_f16_16x16x16_f16_negC_pack:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    flat_load_b128 v[12:15], v[8:9]
+; GFX1170-NEXT:    flat_load_b128 v[16:19], v[8:9] offset:16
+; GFX1170-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
+; GFX1170-NEXT:    v_and_b32_e32 v8, 0xffff, v12
+; GFX1170-NEXT:    v_and_b32_e32 v9, 0xffff, v14
+; GFX1170-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX1170-NEXT:    v_and_b32_e32 v14, 0xffff, v16
+; GFX1170-NEXT:    v_and_b32_e32 v16, 0xffff, v18
+; GFX1170-NEXT:    v_lshl_or_b32 v12, v13, 16, v8
+; GFX1170-NEXT:    v_lshl_or_b32 v13, v15, 16, v9
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1170-NEXT:    v_lshl_or_b32 v14, v17, 16, v14
+; GFX1170-NEXT:    v_lshl_or_b32 v15, v19, 16, v16
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_wmma_f16_16x16x16_f16 v[12:15], v[0:3], v[4:7], v[12:15] neg_lo:[0,0,1]
+; GFX1170-NEXT:    global_store_b128 v[10:11], v[12:15], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negC_pack:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    s_clause 0x1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll
index 6b749df71223f..8f8267952cbe1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll
@@ -1,14 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_imm(<8 x half> %A, <8 x half> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[10:17], v[0:3], v[4:7], 1.0
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[10:17], v[0:3], v[4:7], 1.0
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16.v8f16.v8f32(<8 x half> %A, <8 x half> %B, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -16,27 +17,27 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_imm_non_inlineable(<8 x half> %A, <8 x half> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s7, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s4, s0
-; GFX12-NEXT:    s_mov_b32 s5, s0
-; GFX12-NEXT:    s_mov_b32 s6, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v17, s7 :: v_dual_mov_b32 v16, s6
-; GFX12-NEXT:    v_dual_mov_b32 v15, s5 :: v_dual_mov_b32 v14, s4
-; GFX12-NEXT:    v_dual_mov_b32 v13, s3 :: v_dual_mov_b32 v12, s2
-; GFX12-NEXT:    v_dual_mov_b32 v11, s1 :: v_dual_mov_b32 v10, s0
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[10:17], v[0:3], v[4:7], v[10:17]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s7, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s0
+; GCN-NEXT:    s_mov_b32 s6, s0
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v17, s7 :: v_dual_mov_b32 v16, s6
+; GCN-NEXT:    v_dual_mov_b32 v15, s5 :: v_dual_mov_b32 v14, s4
+; GCN-NEXT:    v_dual_mov_b32 v13, s3 :: v_dual_mov_b32 v12, s2
+; GCN-NEXT:    v_dual_mov_b32 v11, s1 :: v_dual_mov_b32 v10, s0
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[10:17], v[0:3], v[4:7], v[10:17]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16.v8f16.v8f32(<8 x half> %A, <8 x half> %B, <8 x float> <float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -44,13 +45,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16_imm(<8 x i16> %A, <8 x i16> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[10:17], v[0:3], v[4:7], 1.0
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[10:17], v[0:3], v[4:7], 1.0
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16.v8i16.v8f32(<8 x i16> %A, <8 x i16> %B, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -58,27 +59,27 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16_imm_non_inlineable(<8 x i16> %A, <8 x i16> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s7, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s4, s0
-; GFX12-NEXT:    s_mov_b32 s5, s0
-; GFX12-NEXT:    s_mov_b32 s6, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v17, s7 :: v_dual_mov_b32 v16, s6
-; GFX12-NEXT:    v_dual_mov_b32 v15, s5 :: v_dual_mov_b32 v14, s4
-; GFX12-NEXT:    v_dual_mov_b32 v13, s3 :: v_dual_mov_b32 v12, s2
-; GFX12-NEXT:    v_dual_mov_b32 v11, s1 :: v_dual_mov_b32 v10, s0
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[10:17], v[0:3], v[4:7], v[10:17]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s7, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s0
+; GCN-NEXT:    s_mov_b32 s6, s0
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v17, s7 :: v_dual_mov_b32 v16, s6
+; GCN-NEXT:    v_dual_mov_b32 v15, s5 :: v_dual_mov_b32 v14, s4
+; GCN-NEXT:    v_dual_mov_b32 v13, s3 :: v_dual_mov_b32 v12, s2
+; GCN-NEXT:    v_dual_mov_b32 v11, s1 :: v_dual_mov_b32 v10, s0
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[10:17], v[0:3], v[4:7], v[10:17]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16.v8i16.v8f32(<8 x i16> %A, <8 x i16> %B, <8 x float> <float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -86,11 +87,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_imm(<8 x half> %A, <8 x half> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[10:13], v[0:3], v[4:7], 1.0
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[10:13], v[0:3], v[4:7], 1.0
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>, i1 0)
   store <8 x half> %res, ptr addrspace(1) %out
@@ -98,19 +99,19 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_imm_non_inlineable(<8 x half> %A, <8 x half> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x42004200
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    v_dual_mov_b32 v13, s3 :: v_dual_mov_b32 v12, s2
-; GFX12-NEXT:    v_dual_mov_b32 v11, s1 :: v_dual_mov_b32 v10, s0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[10:13], v[0:3], v[4:7], v[10:13]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x42004200
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    v_dual_mov_b32 v13, s3 :: v_dual_mov_b32 v12, s2
+; GCN-NEXT:    v_dual_mov_b32 v11, s1 :: v_dual_mov_b32 v10, s0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[10:13], v[0:3], v[4:7], v[10:13]
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> <half 3.0, half 3.0, half 3.0, half 3.0, half 3.0, half 3.0, half 3.0, half 3.0>, i1 0)
   store <8 x half> %res, ptr addrspace(1) %out
@@ -118,19 +119,19 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_imm(<8 x i16> %A, <8 x i16> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_bf16_16x16x16_bf16_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x3f803f80
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    v_dual_mov_b32 v13, s3 :: v_dual_mov_b32 v12, s2
-; GFX12-NEXT:    v_dual_mov_b32 v11, s1 :: v_dual_mov_b32 v10, s0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_bf16_16x16x16_bf16 v[10:13], v[0:3], v[4:7], v[10:13]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_bf16_16x16x16_bf16_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x3f803f80
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    v_dual_mov_b32 v13, s3 :: v_dual_mov_b32 v12, s2
+; GCN-NEXT:    v_dual_mov_b32 v11, s1 :: v_dual_mov_b32 v10, s0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_bf16_16x16x16_bf16 v[10:13], v[0:3], v[4:7], v[10:13]
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16.v8i16.v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> <i16 16256, i16 16256, i16 16256, i16 16256, i16 16256, i16 16256, i16 16256, i16 16256>, i1 0)
   store <8 x i16> %res, ptr addrspace(1) %out
@@ -138,19 +139,19 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_imm_non_inlineable(<8 x i16> %A, <8 x i16> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_bf16_16x16x16_bf16_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x3fc03fc0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    v_dual_mov_b32 v13, s3 :: v_dual_mov_b32 v12, s2
-; GFX12-NEXT:    v_dual_mov_b32 v11, s1 :: v_dual_mov_b32 v10, s0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_bf16_16x16x16_bf16 v[10:13], v[0:3], v[4:7], v[10:13]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_bf16_16x16x16_bf16_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x3fc03fc0
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    v_dual_mov_b32 v13, s3 :: v_dual_mov_b32 v12, s2
+; GCN-NEXT:    v_dual_mov_b32 v11, s1 :: v_dual_mov_b32 v10, s0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_bf16_16x16x16_bf16 v[10:13], v[0:3], v[4:7], v[10:13]
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16.v8i16.v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> <i16 16320, i16 16320, i16 16320, i16 16320, i16 16320, i16 16320, i16 16320, i16 16320>, i1 0)
   store <8 x i16> %res, ptr addrspace(1) %out
@@ -158,13 +159,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_imm(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[6:13], v[0:1], v[2:3], 1
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[6:13], v[0:1], v[2:3], 1
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32.v2i32.v8i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -172,27 +173,27 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_movk_i32 s0, 0x80
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s7, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s4, s0
-; GFX12-NEXT:    s_mov_b32 s5, s0
-; GFX12-NEXT:    s_mov_b32 s6, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v13, s7 :: v_dual_mov_b32 v12, s6
-; GFX12-NEXT:    v_dual_mov_b32 v11, s5 :: v_dual_mov_b32 v10, s4
-; GFX12-NEXT:    v_dual_mov_b32 v9, s3 :: v_dual_mov_b32 v8, s2
-; GFX12-NEXT:    v_dual_mov_b32 v7, s1 :: v_dual_mov_b32 v6, s0
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_movk_i32 s0, 0x80
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s7, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s0
+; GCN-NEXT:    s_mov_b32 s6, s0
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v13, s7 :: v_dual_mov_b32 v12, s6
+; GCN-NEXT:    v_dual_mov_b32 v11, s5 :: v_dual_mov_b32 v10, s4
+; GCN-NEXT:    v_dual_mov_b32 v9, s3 :: v_dual_mov_b32 v8, s2
+; GCN-NEXT:    v_dual_mov_b32 v7, s1 :: v_dual_mov_b32 v6, s0
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[6:13], v[0:1], v[2:3], v[6:13]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32.v2i32.v8i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> <i32 128, i32 128, i32 128, i32 128, i32 128, i32 128, i32 128, i32 128>, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -200,13 +201,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_imm(i32 %A, i32 %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v0, v1, 1
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[2:3], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v0, v1, 1
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[2:3], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32.i32.v8i32(i1 0, i32 %A, i1 0, i32 %B, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -214,27 +215,27 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_imm_non_inlineable(i32 %A, i32 %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_movk_i32 s0, 0x80
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s7, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s4, s0
-; GFX12-NEXT:    s_mov_b32 s5, s0
-; GFX12-NEXT:    s_mov_b32 s6, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v11, s7 :: v_dual_mov_b32 v10, s6
-; GFX12-NEXT:    v_dual_mov_b32 v9, s5 :: v_dual_mov_b32 v8, s4
-; GFX12-NEXT:    v_dual_mov_b32 v7, s3 :: v_dual_mov_b32 v6, s2
-; GFX12-NEXT:    v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v0, v1, v[4:11]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[2:3], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_movk_i32 s0, 0x80
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s7, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s0
+; GCN-NEXT:    s_mov_b32 s6, s0
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v11, s7 :: v_dual_mov_b32 v10, s6
+; GCN-NEXT:    v_dual_mov_b32 v9, s5 :: v_dual_mov_b32 v8, s4
+; GCN-NEXT:    v_dual_mov_b32 v7, s3 :: v_dual_mov_b32 v6, s2
+; GCN-NEXT:    v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v0, v1, v[4:11]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[2:3], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32.i32.v8i32(i1 0, i32 %A, i1 0, i32 %B, <8 x i32> <i32 128, i32 128, i32 128, i32 128, i32 128, i32 128, i32 128, i32 128>, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -242,13 +243,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8_imm(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[6:13], v[0:1], v[2:3], 1.0
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[6:13], v[0:1], v[2:3], 1.0
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -256,27 +257,27 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s7, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s4, s0
-; GFX12-NEXT:    s_mov_b32 s5, s0
-; GFX12-NEXT:    s_mov_b32 s6, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v13, s7 :: v_dual_mov_b32 v12, s6
-; GFX12-NEXT:    v_dual_mov_b32 v11, s5 :: v_dual_mov_b32 v10, s4
-; GFX12-NEXT:    v_dual_mov_b32 v9, s3 :: v_dual_mov_b32 v8, s2
-; GFX12-NEXT:    v_dual_mov_b32 v7, s1 :: v_dual_mov_b32 v6, s0
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s7, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s0
+; GCN-NEXT:    s_mov_b32 s6, s0
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v13, s7 :: v_dual_mov_b32 v12, s6
+; GCN-NEXT:    v_dual_mov_b32 v11, s5 :: v_dual_mov_b32 v10, s4
+; GCN-NEXT:    v_dual_mov_b32 v9, s3 :: v_dual_mov_b32 v8, s2
+; GCN-NEXT:    v_dual_mov_b32 v7, s1 :: v_dual_mov_b32 v6, s0
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[6:13], v[0:1], v[2:3], v[6:13]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> <float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -284,13 +285,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8_imm(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[6:13], v[0:1], v[2:3], 1.0
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[6:13], v[0:1], v[2:3], 1.0
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -298,27 +299,27 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s7, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s4, s0
-; GFX12-NEXT:    s_mov_b32 s5, s0
-; GFX12-NEXT:    s_mov_b32 s6, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v13, s7 :: v_dual_mov_b32 v12, s6
-; GFX12-NEXT:    v_dual_mov_b32 v11, s5 :: v_dual_mov_b32 v10, s4
-; GFX12-NEXT:    v_dual_mov_b32 v9, s3 :: v_dual_mov_b32 v8, s2
-; GFX12-NEXT:    v_dual_mov_b32 v7, s1 :: v_dual_mov_b32 v6, s0
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s7, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s0
+; GCN-NEXT:    s_mov_b32 s6, s0
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v13, s7 :: v_dual_mov_b32 v12, s6
+; GCN-NEXT:    v_dual_mov_b32 v11, s5 :: v_dual_mov_b32 v10, s4
+; GCN-NEXT:    v_dual_mov_b32 v9, s3 :: v_dual_mov_b32 v8, s2
+; GCN-NEXT:    v_dual_mov_b32 v7, s1 :: v_dual_mov_b32 v6, s0
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[6:13], v[0:1], v[2:3], v[6:13]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> <float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -326,13 +327,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8_imm(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[6:13], v[0:1], v[2:3], 1.0
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[6:13], v[0:1], v[2:3], 1.0
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -340,27 +341,27 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s7, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s4, s0
-; GFX12-NEXT:    s_mov_b32 s5, s0
-; GFX12-NEXT:    s_mov_b32 s6, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v13, s7 :: v_dual_mov_b32 v12, s6
-; GFX12-NEXT:    v_dual_mov_b32 v11, s5 :: v_dual_mov_b32 v10, s4
-; GFX12-NEXT:    v_dual_mov_b32 v9, s3 :: v_dual_mov_b32 v8, s2
-; GFX12-NEXT:    v_dual_mov_b32 v7, s1 :: v_dual_mov_b32 v6, s0
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s7, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s0
+; GCN-NEXT:    s_mov_b32 s6, s0
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v13, s7 :: v_dual_mov_b32 v12, s6
+; GCN-NEXT:    v_dual_mov_b32 v11, s5 :: v_dual_mov_b32 v10, s4
+; GCN-NEXT:    v_dual_mov_b32 v9, s3 :: v_dual_mov_b32 v8, s2
+; GCN-NEXT:    v_dual_mov_b32 v7, s1 :: v_dual_mov_b32 v6, s0
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[6:13], v[0:1], v[2:3], v[6:13]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> <float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -368,13 +369,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8_imm(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[6:13], v[0:1], v[2:3], 1.0
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[6:13], v[0:1], v[2:3], 1.0
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -382,27 +383,27 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s7, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s4, s0
-; GFX12-NEXT:    s_mov_b32 s5, s0
-; GFX12-NEXT:    s_mov_b32 s6, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v13, s7 :: v_dual_mov_b32 v12, s6
-; GFX12-NEXT:    v_dual_mov_b32 v11, s5 :: v_dual_mov_b32 v10, s4
-; GFX12-NEXT:    v_dual_mov_b32 v9, s3 :: v_dual_mov_b32 v8, s2
-; GFX12-NEXT:    v_dual_mov_b32 v7, s1 :: v_dual_mov_b32 v6, s0
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s7, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s0
+; GCN-NEXT:    s_mov_b32 s6, s0
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v13, s7 :: v_dual_mov_b32 v12, s6
+; GCN-NEXT:    v_dual_mov_b32 v11, s5 :: v_dual_mov_b32 v10, s4
+; GCN-NEXT:    v_dual_mov_b32 v9, s3 :: v_dual_mov_b32 v8, s2
+; GCN-NEXT:    v_dual_mov_b32 v7, s1 :: v_dual_mov_b32 v6, s0
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[6:13], v[0:1], v[2:3], v[6:13]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> <float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -410,13 +411,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_imm(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[6:13], v[0:1], v[2:3], 1
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[6:13], v[0:1], v[2:3], 1
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32.v2i32.v8i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -424,27 +425,27 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_movk_i32 s0, 0x80
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s7, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s4, s0
-; GFX12-NEXT:    s_mov_b32 s5, s0
-; GFX12-NEXT:    s_mov_b32 s6, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v13, s7 :: v_dual_mov_b32 v12, s6
-; GFX12-NEXT:    v_dual_mov_b32 v11, s5 :: v_dual_mov_b32 v10, s4
-; GFX12-NEXT:    v_dual_mov_b32 v9, s3 :: v_dual_mov_b32 v8, s2
-; GFX12-NEXT:    v_dual_mov_b32 v7, s1 :: v_dual_mov_b32 v6, s0
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_movk_i32 s0, 0x80
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s7, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s0
+; GCN-NEXT:    s_mov_b32 s6, s0
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v13, s7 :: v_dual_mov_b32 v12, s6
+; GCN-NEXT:    v_dual_mov_b32 v11, s5 :: v_dual_mov_b32 v10, s4
+; GCN-NEXT:    v_dual_mov_b32 v9, s3 :: v_dual_mov_b32 v8, s2
+; GCN-NEXT:    v_dual_mov_b32 v7, s1 :: v_dual_mov_b32 v6, s0
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[6:13], v[0:1], v[2:3], v[6:13]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32.v2i32.v8i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> <i32 128, i32 128, i32 128, i32 128, i32 128, i32 128, i32 128, i32 128>, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -473,3 +474,6 @@ declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.v
 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX1170: {{.*}}
+; GFX12: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-iu-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-iu-modifiers.ll
index 929a51bfff53c..37900d6db1027 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-iu-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-iu-modifiers.ll
@@ -1,14 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_zext_src0(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32.v2i32.v8i32(i1 1, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -16,13 +17,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_zext_src1(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32.v2i32.v8i32(i1 0, <2 x i32> %A, i1 1, <2 x i32> %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -30,13 +31,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] clamp
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] clamp
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32.v2i32.v8i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 1)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -46,13 +47,13 @@ bb:
 
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_zext_src0(i32 %A, i32 %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_lo:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[10:11], v[2:5], off
-; GFX12-NEXT:    global_store_b128 v[10:11], v[6:9], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_lo:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[10:11], v[2:5], off
+; GCN-NEXT:    global_store_b128 v[10:11], v[6:9], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32.i32.v8i32(i1 1, i32 %A, i1 0, i32 %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -60,13 +61,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_zext_src1(i32 %A, i32 %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_lo:[0,1,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[10:11], v[2:5], off
-; GFX12-NEXT:    global_store_b128 v[10:11], v[6:9], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_lo:[0,1,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[10:11], v[2:5], off
+; GCN-NEXT:    global_store_b128 v[10:11], v[6:9], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32.i32.v8i32(i1 0, i32 %A, i1 1, i32 %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -74,13 +75,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_clamp(i32 %A, i32 %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] clamp
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[10:11], v[2:5], off
-; GFX12-NEXT:    global_store_b128 v[10:11], v[6:9], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] clamp
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[10:11], v[2:5], off
+; GCN-NEXT:    global_store_b128 v[10:11], v[6:9], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32.i32.v8i32(i1 0, i32 %A, i1 0, i32 %B, <8 x i32> %C, i1 1)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -90,13 +91,13 @@ bb:
 
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_zext_src0(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32.v2i32.v8i32(i1 1, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -104,13 +105,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_zext_src1(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32.v2i32.v8i32(i1 0, <2 x i32> %A, i1 1, <2 x i32> %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -118,13 +119,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] clamp
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] clamp
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32.v2i32.v8i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 1)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -136,13 +137,13 @@ bb:
 
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_zext_src0(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu8_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.v8i32.i16(i1 1, <2 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i16 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -150,13 +151,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_zext_src1(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu8_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.v8i32.i16(i1 0, <2 x i32> %A, i1 1, <4 x i32> %B, <8 x i32> %C, i16 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -164,13 +165,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_clamp(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 clamp
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu8_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 clamp
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.v8i32.i16(i1 0, <2 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i16 %Index, i1 1)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -180,13 +181,13 @@ bb:
 
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_zext_src0(i32 %A, <2 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_lo:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[3:6], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[7:10], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu4_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_lo:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[3:6], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[7:10], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.v8i32.i16(i1 1, i32 %A, i1 0, <2 x i32> %B, <8 x i32> %C, i16 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -194,13 +195,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_zext_src1(i32 %A, <2 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_lo:[0,1,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[3:6], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[7:10], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu4_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_lo:[0,1,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[3:6], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[7:10], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.v8i32.i16(i1 0, i32 %A, i1 1, <2 x i32> %B, <8 x i32> %C, i16 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -208,13 +209,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_clamp(i32 %A, <2 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 clamp
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[3:6], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[7:10], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu4_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 clamp
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[3:6], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[7:10], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.v8i32.i16(i1 0, i32 %A, i1 0, <2 x i32> %B, <8 x i32> %C, i16 %Index, i1 1)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -224,13 +225,13 @@ bb:
 
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4_zext_src0(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i32 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x64_iu4_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.v8i32.i32(i1 1, <2 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i32 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -238,13 +239,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4_zext_src1(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i32 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x64_iu4_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.v8i32.i32(i1 0, <2 x i32> %A, i1 1, <4 x i32> %B, <8 x i32> %C, i32 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -252,13 +253,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4_clamp(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i32 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 clamp
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x64_iu4_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 clamp
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.v8i32.i32(i1 0, <2 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i32 %Index, i1 1)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -271,3 +272,6 @@ declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32.v2i32.v8i32(i1
 declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.v8i32.i16(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i16 %Index, i1 immarg)
 declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.v8i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <8 x i32>, i16 %Index, i1 immarg)
 declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.v8i32.i32(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i32 %Index, i1 immarg)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX1170: {{.*}}
+; GFX12: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-swmmac-index_key.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-swmmac-index_key.ll
index 7c0f72606a5ba..a3d0da7dfc143 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-swmmac-index_key.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-swmmac-index_key.ll
@@ -1,7 +1,27 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_index_key(<8 x half> %A, <16 x half> %B, <8 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_f16_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v20, v[20:21], off
+; GFX1170-NEXT:    v_dual_mov_b32 v33, v19 :: v_dual_mov_b32 v32, v18
+; GFX1170-NEXT:    v_dual_mov_b32 v31, v17 :: v_dual_mov_b32 v30, v16
+; GFX1170-NEXT:    v_dual_mov_b32 v29, v15 :: v_dual_mov_b32 v28, v14
+; GFX1170-NEXT:    v_dual_mov_b32 v27, v13 :: v_dual_mov_b32 v26, v12
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_f16 v[26:33], v[0:3], v[4:11], v20
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 index_key:1
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[22:23], v[26:29], off
+; GFX1170-NEXT:    global_store_b128 v[22:23], v[30:33], off offset:16
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[24:25], v[12:15], off
+; GFX1170-NEXT:    global_store_b128 v[24:25], v[16:19], off offset:16
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v20, v[20:21], off
@@ -32,6 +52,25 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf16_index_key(<8 x i16> %A, <16 x i16> %B, <8 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_bf16_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v20, v[20:21], off
+; GFX1170-NEXT:    v_dual_mov_b32 v33, v19 :: v_dual_mov_b32 v32, v18
+; GFX1170-NEXT:    v_dual_mov_b32 v31, v17 :: v_dual_mov_b32 v30, v16
+; GFX1170-NEXT:    v_dual_mov_b32 v29, v15 :: v_dual_mov_b32 v28, v14
+; GFX1170-NEXT:    v_dual_mov_b32 v27, v13 :: v_dual_mov_b32 v26, v12
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf16 v[26:33], v[0:3], v[4:11], v20
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 index_key:1
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[22:23], v[26:29], off
+; GFX1170-NEXT:    global_store_b128 v[22:23], v[30:33], off offset:16
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[24:25], v[12:15], off
+; GFX1170-NEXT:    global_store_b128 v[24:25], v[16:19], off offset:16
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_bf16_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v20, v[20:21], off
@@ -62,6 +101,19 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_index_key(<8 x half> %A, <16 x half> %B, <8 x half> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_f16_16x16x32_f16_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v16, v[16:17], off
+; GFX1170-NEXT:    v_dual_mov_b32 v25, v15 :: v_dual_mov_b32 v24, v14
+; GFX1170-NEXT:    v_dual_mov_b32 v23, v13 :: v_dual_mov_b32 v22, v12
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_f16_16x16x32_f16 v[22:25], v[0:3], v[4:11], v16
+; GFX1170-NEXT:    v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 index_key:1
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[22:25], off
+; GFX1170-NEXT:    global_store_b128 v[20:21], v[12:15], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f16_16x16x32_f16_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v16, v[16:17], off
@@ -86,6 +138,19 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_bf16_16x16x32_bf16_index_key(<8 x i16> %A, <16 x i16> %B, <8 x i16> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_bf16_16x16x32_bf16_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v16, v[16:17], off
+; GFX1170-NEXT:    v_dual_mov_b32 v25, v15 :: v_dual_mov_b32 v24, v14
+; GFX1170-NEXT:    v_dual_mov_b32 v23, v13 :: v_dual_mov_b32 v22, v12
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[22:25], v[0:3], v[4:11], v16
+; GFX1170-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 index_key:1
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[22:25], off
+; GFX1170-NEXT:    global_store_b128 v[20:21], v[12:15], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_bf16_16x16x32_bf16_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v16, v[16:17], off
@@ -110,6 +175,25 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_index_key(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_i32_16x16x32_iu8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v14, v[14:15], off
+; GFX1170-NEXT:    v_dual_mov_b32 v27, v13 :: v_dual_mov_b32 v26, v12
+; GFX1170-NEXT:    v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
+; GFX1170-NEXT:    v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
+; GFX1170-NEXT:    v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu8 v[20:27], v[0:1], v[2:5], v14
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 index_key:1
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[20:23], off
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[24:27], off offset:16
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[6:9], off
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[10:13], off offset:16
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v14, v[14:15], off
@@ -140,6 +224,25 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_index_key(i32 %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_i32_16x16x32_iu4_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v11, v[11:12], off
+; GFX1170-NEXT:    v_dual_mov_b32 v24, v10 :: v_dual_mov_b32 v23, v9
+; GFX1170-NEXT:    v_dual_mov_b32 v22, v8 :: v_dual_mov_b32 v21, v7
+; GFX1170-NEXT:    v_dual_mov_b32 v20, v6 :: v_dual_mov_b32 v19, v5
+; GFX1170-NEXT:    v_dual_mov_b32 v18, v4 :: v_dual_mov_b32 v17, v3
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu4 v[17:24], v0, v[1:2], v11
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 index_key:1
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[13:14], v[17:20], off
+; GFX1170-NEXT:    global_store_b128 v[13:14], v[21:24], off offset:16
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[15:16], v[3:6], off
+; GFX1170-NEXT:    global_store_b128 v[15:16], v[7:10], off offset:16
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v11, v[11:12], off
@@ -170,6 +273,25 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_fp8_index_key(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_fp8_fp8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v14, v[14:15], off
+; GFX1170-NEXT:    v_dual_mov_b32 v27, v13 :: v_dual_mov_b32 v26, v12
+; GFX1170-NEXT:    v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
+; GFX1170-NEXT:    v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
+; GFX1170-NEXT:    v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[20:27], v[0:1], v[2:5], v14
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:1
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[20:23], off
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[24:27], off offset:16
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[6:9], off
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[10:13], off offset:16
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_fp8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v14, v[14:15], off
@@ -200,6 +322,25 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_bf8_index_key(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_fp8_bf8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v14, v[14:15], off
+; GFX1170-NEXT:    v_dual_mov_b32 v27, v13 :: v_dual_mov_b32 v26, v12
+; GFX1170-NEXT:    v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
+; GFX1170-NEXT:    v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
+; GFX1170-NEXT:    v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[20:27], v[0:1], v[2:5], v14
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:1
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[20:23], off
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[24:27], off offset:16
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[6:9], off
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[10:13], off offset:16
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_bf8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v14, v[14:15], off
@@ -230,6 +371,25 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_fp8_index_key(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_bf8_fp8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v14, v[14:15], off
+; GFX1170-NEXT:    v_dual_mov_b32 v27, v13 :: v_dual_mov_b32 v26, v12
+; GFX1170-NEXT:    v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
+; GFX1170-NEXT:    v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
+; GFX1170-NEXT:    v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[20:27], v[0:1], v[2:5], v14
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:1
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[20:23], off
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[24:27], off offset:16
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[6:9], off
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[10:13], off offset:16
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_fp8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v14, v[14:15], off
@@ -260,6 +420,25 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_bf8_index_key(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_bf8_bf8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v14, v[14:15], off
+; GFX1170-NEXT:    v_dual_mov_b32 v27, v13 :: v_dual_mov_b32 v26, v12
+; GFX1170-NEXT:    v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
+; GFX1170-NEXT:    v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
+; GFX1170-NEXT:    v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[20:27], v[0:1], v[2:5], v14
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:1
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[20:23], off
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[24:27], off offset:16
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[6:9], off
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[10:13], off offset:16
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_bf8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v14, v[14:15], off
@@ -299,3 +478,5 @@ declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.v
 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32.ll
index da61bc4758879..4eacdbe171e3e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32.ll
@@ -1,14 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16.v8f16.v8f32(<8 x half> %A, <8 x half> %B, <8 x float> %C)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -16,13 +17,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<8 x i16> %A, <8 x i16> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16.v8i16.v8f32(<8 x i16> %A, <8 x i16> %B, <8 x float> %C)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -30,11 +31,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16(<8 x half> %A, <8 x half> %B, <8 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11]
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11]
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %C, i1 0)
   store <8 x half> %res, ptr addrspace(1) %out
@@ -42,11 +43,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_bf16_16x16x16_bf16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11]
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_bf16_16x16x16_bf16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11]
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16.v8i16.v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i1 0)
   store <8 x i16> %res, ptr addrspace(1) %out
@@ -54,13 +55,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32.v2i32.v8i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -68,13 +69,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4(i32 %A, i32 %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[10:11], v[2:5], off
-; GFX12-NEXT:    global_store_b128 v[10:11], v[6:9], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[10:11], v[2:5], off
+; GCN-NEXT:    global_store_b128 v[10:11], v[6:9], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32.i32.v8i32(i1 0, i32 %A, i1 0, i32 %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -82,13 +83,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> %C)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -96,13 +97,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> %C)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -110,13 +111,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> %C)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -124,13 +125,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32.v2i32.v8f32(<2 x i32> %A, <2 x i32> %B, <8 x float> %C)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -138,13 +139,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32.v2i32.v8i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -153,13 +154,13 @@ bb:
 
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16(<8 x half> %A, <16 x half> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_f16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[21:22], v[12:15], off
-; GFX12-NEXT:    global_store_b128 v[21:22], v[16:19], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_f16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[21:22], v[12:15], off
+; GCN-NEXT:    global_store_b128 v[21:22], v[16:19], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.v8f32.i16(<8 x half> %A, <16 x half> %B, <8 x float> %C, i16 %Index)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -167,13 +168,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf16(<8 x i16> %A, <16 x i16> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_bf16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[21:22], v[12:15], off
-; GFX12-NEXT:    global_store_b128 v[21:22], v[16:19], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_bf16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[21:22], v[12:15], off
+; GCN-NEXT:    global_store_b128 v[21:22], v[16:19], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.v8f32.i16(<8 x i16> %A, <16 x i16> %B, <8 x float> %C, i16 %Index)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -181,11 +182,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f16_16x16x32_f16(<8 x half> %A, <16 x half> %B, <8 x half> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f16_16x16x32_f16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16
-; GFX12-NEXT:    global_store_b128 v[17:18], v[12:15], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f16_16x16x32_f16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16
+; GCN-NEXT:    global_store_b128 v[17:18], v[12:15], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.v8f16.i16(<8 x half> %A, <16 x half> %B, <8 x half> %C, i16 %Index)
   store <8 x half> %res, ptr addrspace(1) %out
@@ -193,11 +194,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_bf16_16x16x32_bf16(<8 x i16> %A, <16 x i16> %B, <8 x i16> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_bf16_16x16x32_bf16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16
-; GFX12-NEXT:    global_store_b128 v[17:18], v[12:15], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_bf16_16x16x32_bf16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16
+; GCN-NEXT:    global_store_b128 v[17:18], v[12:15], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.v8i16.i16(<8 x i16> %A, <16 x i16> %B, <8 x i16> %C, i16 %Index)
   store <8 x i16> %res, ptr addrspace(1) %out
@@ -205,13 +206,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.v8i32.i16(i1 0, <2 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i16 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -219,13 +220,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4(i32 %A, <2 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[3:6], off
-; GFX12-NEXT:    global_store_b128 v[12:13], v[7:10], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu4:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[3:6], off
+; GCN-NEXT:    global_store_b128 v[12:13], v[7:10], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.v8i32.i16(i1 0, i32 %A, i1 0, <2 x i32> %B, <8 x i32> %C, i16 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -233,13 +234,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i32 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x64_iu4:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.v8i32.i32(i1 0, <2 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i32 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -247,13 +248,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_fp8(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_fp8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_fp8_fp8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -261,13 +262,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_bf8(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_bf8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_fp8_bf8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -275,13 +276,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_fp8(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_fp8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_bf8_fp8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -289,13 +290,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_bf8(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_bf8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_bf8_bf8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -324,3 +325,6 @@ declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.v
 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX1170: {{.*}}
+; GFX12: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
index a345ee6def7a7..3886a072b1763 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
@@ -1,12 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1170 -mattr=+wavefrontsize64,-real-true16 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negA(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <4 x half> %A
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f16.v4f32(<4 x half> %fneg.A, <4 x half> %B, <4 x float> %C)
@@ -15,11 +16,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negB(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negB:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negB:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <4 x half> %B
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f16.v4f32(<4 x half> %A, <4 x half> %fneg.B, <4 x float> %C)
@@ -28,11 +29,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negC(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <4 x float> %C
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f16.v4f32(<4 x half> %A, <4 x half> %B, <4 x float> %fneg.C)
@@ -41,11 +42,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_absC(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x float> @llvm.fabs.v4f32(<4 x float> %C)
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f16.v4f32(<4 x half> %A, <4 x half> %B, <4 x float> %fabs.C)
@@ -54,11 +55,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16_negC(<4 x i16> %A, <4 x i16> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <4 x float> %C
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4i16.v4f32(<4 x i16> %A, <4 x i16> %B, <4 x float> %fneg.C)
@@ -67,11 +68,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16_absC(<4 x i16> %A, <4 x i16> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x float> @llvm.fabs.v4f32(<4 x float> %C)
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4i16.v4f32(<4 x i16> %A, <4 x i16> %B, <4 x float> %fabs.C)
@@ -80,11 +81,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negA(<4 x half> %A, <4 x half> %B, <4 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    global_store_b64 v[6:7], v[4:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    global_store_b64 v[6:7], v[4:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <4 x half> %A
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %fneg.A, <4 x half> %B, <4 x half> %C, i1 0)
@@ -93,11 +94,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negB(<4 x half> %A, <4 x half> %B, <4 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negB:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    global_store_b64 v[6:7], v[4:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negB:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    global_store_b64 v[6:7], v[4:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <4 x half> %B
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %fneg.B, <4 x half> %C, i1 0)
@@ -106,11 +107,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC(<4 x half> %A, <4 x half> %B, <4 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b64 v[6:7], v[4:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b64 v[6:7], v[4:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <4 x half> %C
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %B, <4 x half> %fneg.C, i1 0)
@@ -119,11 +120,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_absC(<4 x half> %A, <4 x half> %B, <4 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b64 v[6:7], v[4:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b64 v[6:7], v[4:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x half> @llvm.fabs.v4f16(<4 x half> %C)
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %B, <4 x half> %fabs.C, i1 0)
@@ -132,11 +133,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8_negC(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <4 x float> %C
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.i32.v4f32(i32 %A, i32 %B, <4 x float> %fneg.C)
@@ -145,11 +146,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8_absC(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x float> @llvm.fabs.v4f32(<4 x float> %C)
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.i32.v4f32(i32 %A, i32 %B, <4 x float> %fabs.C)
@@ -158,11 +159,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8_negC(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <4 x float> %C
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.i32.v4f32(i32 %A, i32 %B, <4 x float> %fneg.C)
@@ -171,11 +172,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8_absC(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x float> @llvm.fabs.v4f32(<4 x float> %C)
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.i32.v4f32(i32 %A, i32 %B, <4 x float> %fabs.C)
@@ -184,11 +185,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8_negC(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <4 x float> %C
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.i32.v4f32(i32 %A, i32 %B, <4 x float> %fneg.C)
@@ -197,11 +198,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8_absC(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x float> @llvm.fabs.v4f32(<4 x float> %C)
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.i32.v4f32(i32 %A, i32 %B, <4 x float> %fabs.C)
@@ -210,11 +211,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8_negC(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <4 x float> %C
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.i32.v4f32(i32 %A, i32 %B, <4 x float> %fneg.C)
@@ -223,11 +224,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8_absC(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x float> @llvm.fabs.v4f32(<4 x float> %C)
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.i32.v4f32(i32 %A, i32 %B, <4 x float> %fabs.C)
@@ -236,11 +237,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_negA(<4 x half> %A, <8 x half> %B, <4 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_negA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[11:12], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_f16_negA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[11:12], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <4 x half> %A
   %res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f16.v8f16.v4f32.i16(<4 x half> %fneg.A, <8 x half> %B, <4 x float> %C, i16 %Index)
@@ -249,11 +250,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_negB(<4 x half> %A, <8 x half> %B, <4 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_negB:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[11:12], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_f16_negB:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[11:12], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <8 x half> %B
   %res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f16.v8f16.v4f32.i16(<4 x half> %A, <8 x half> %fneg.B, <4 x float> %C, i16 %Index)
@@ -262,11 +263,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_negA(<4 x half> %A, <8 x half> %B, <4 x half> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f16_16x16x32_f16_negA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    global_store_b64 v[9:10], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f16_16x16x32_f16_negA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    global_store_b64 v[9:10], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <4 x half> %A
   %res = call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v8f16.v4f16.i16(<4 x half> %fneg.A, <8 x half> %B, <4 x half> %C, i16 %Index)
@@ -275,11 +276,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_negB(<4 x half> %A, <8 x half> %B, <4 x half> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f16_16x16x32_f16_negB:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    global_store_b64 v[9:10], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f16_16x16x32_f16_negB:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    global_store_b64 v[9:10], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <8 x half> %B
   %res = call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v8f16.v4f16.i16(<4 x half> %A, <8 x half> %fneg.B, <4 x half> %C, i16 %Index)
@@ -290,11 +291,11 @@ bb:
 ; both neg and abs patterns (wmma matrix C f32 or f16 )
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negabsC(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negabsC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negabsC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x float> @llvm.fabs.v4f32(<4 x float> %C)
   %fneg.fabs.C = fneg <4 x float> %fabs.C
@@ -304,11 +305,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negabsC(<4 x half> %A, <4 x half> %B, <4 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negabsC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b64 v[6:7], v[4:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negabsC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b64 v[6:7], v[4:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x half> @llvm.fabs.v4f16(<4 x half> %C)
   %fneg.fabs.C = fneg <4 x half> %fabs.C
@@ -318,13 +319,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_neg_partial_fabsA(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_neg_partial_fabsA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_and_b32_e32 v7, 0x7fffffff, v7
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_neg_partial_fabsA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_and_b32_e32 v7, 0x7fffffff, v7
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %el3 = extractelement <4 x float> %C, i32 3
   %el3.fabs = call float @llvm.fabs.f32(float %el3)
@@ -338,11 +339,11 @@ bb:
 ; A or B matrix modifier and constant in C
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negA_constantC(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negA_constantC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[6:9], v[0:1], v[2:3], 1.0 neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negA_constantC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[6:9], v[0:1], v[2:3], 1.0 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <4 x half> %A
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f16.v4f32(<4 x half> %fneg.A, <4 x half> %B, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
@@ -351,11 +352,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negB_constantC(<4 x half> %A, <4 x half> %B, <4 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negB_constantC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[6:7], v[0:1], v[2:3], 1.0 neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    global_store_b64 v[4:5], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negB_constantC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[6:7], v[0:1], v[2:3], 1.0 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    global_store_b64 v[4:5], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <4 x half> %B
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %fneg.B, <4 x half> <half 1.0, half 1.0, half 1.0, half 1.0>, i1 0)
@@ -366,6 +367,20 @@ bb:
 ; pack f16 elements with v_perm_b32 since they don't come from same b32
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<4 x half> %A, <4 x half> %B, ptr %Caddr, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_wmma_f16_16x16x16_f16_negC_pack:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    flat_load_b128 v[8:11], v[4:5]
+; GFX1170-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX1170-NEXT:    v_and_b32_e32 v4, 0xffff, v8
+; GFX1170-NEXT:    v_and_b32_e32 v5, 0xffff, v10
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1170-NEXT:    v_lshl_or_b32 v4, v9, 16, v4
+; GFX1170-NEXT:    v_lshl_or_b32 v5, v11, 16, v5
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1]
+; GFX1170-NEXT:    global_store_b64 v[6:7], v[4:5], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negC_pack:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    flat_load_b128 v[8:11], v[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll
index 5344ab8da1ade..ce9b8f9fc3c14 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll
@@ -1,12 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1170 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_imm(<4 x half> %A, <4 x half> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[6:9], v[0:1], v[2:3], 1.0
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[6:9], v[0:1], v[2:3], 1.0
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float>@llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16.v4f16.v4f32(<4 x half> %A, <4 x half> %B, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -14,21 +15,21 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_imm_non_inlineable(<4 x half> %A, <4 x half> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    v_mov_b32_e32 v9, s3
-; GFX12-NEXT:    v_mov_b32_e32 v8, s2
-; GFX12-NEXT:    v_mov_b32_e32 v7, s1
-; GFX12-NEXT:    v_mov_b32_e32 v6, s0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[6:9], v[0:1], v[2:3], v[6:9]
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    v_mov_b32_e32 v9, s3
+; GCN-NEXT:    v_mov_b32_e32 v8, s2
+; GCN-NEXT:    v_mov_b32_e32 v7, s1
+; GCN-NEXT:    v_mov_b32_e32 v6, s0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[6:9], v[0:1], v[2:3], v[6:9]
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float>@llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16.v4f16.v4f32(<4 x half> %A, <4 x half> %B, <4 x float> <float 3.0, float 3.0, float 3.0, float 3.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -36,11 +37,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16_imm(<4 x i16> %A, <4 x i16> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[6:9], v[0:1], v[2:3], 1.0
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[6:9], v[0:1], v[2:3], 1.0
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16.v4i16.v4f32(<4 x i16> %A, <4 x i16> %B, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -48,21 +49,21 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16_imm_non_inlineable(<4 x i16> %A, <4 x i16> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    v_mov_b32_e32 v9, s3
-; GFX12-NEXT:    v_mov_b32_e32 v8, s2
-; GFX12-NEXT:    v_mov_b32_e32 v7, s1
-; GFX12-NEXT:    v_mov_b32_e32 v6, s0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[6:9], v[0:1], v[2:3], v[6:9]
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    v_mov_b32_e32 v9, s3
+; GCN-NEXT:    v_mov_b32_e32 v8, s2
+; GCN-NEXT:    v_mov_b32_e32 v7, s1
+; GCN-NEXT:    v_mov_b32_e32 v6, s0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[6:9], v[0:1], v[2:3], v[6:9]
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16.v4i16.v4f32(<4 x i16> %A, <4 x i16> %B, <4 x float> <float 3.0, float 3.0, float 3.0, float 3.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -70,11 +71,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_imm(<4 x half> %A, <4 x half> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[6:7], v[0:1], v[2:3], 1.0
-; GFX12-NEXT:    global_store_b64 v[4:5], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[6:7], v[0:1], v[2:3], 1.0
+; GCN-NEXT:    global_store_b64 v[4:5], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v4f16.v4f16.v4f16.v4f16(<4 x half> %A, <4 x half> %B, <4 x half> <half 1.0, half 1.0, half 1.0, half 1.0>, i1 0)
   store <4 x half> %res, ptr addrspace(1) %out
@@ -82,17 +83,17 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_imm_non_inlineable(<4 x half> %A, <4 x half> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x42004200
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    v_mov_b32_e32 v7, s1
-; GFX12-NEXT:    v_mov_b32_e32 v6, s0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[6:7], v[0:1], v[2:3], v[6:7]
-; GFX12-NEXT:    global_store_b64 v[4:5], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x42004200
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    v_mov_b32_e32 v7, s1
+; GCN-NEXT:    v_mov_b32_e32 v6, s0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[6:7], v[0:1], v[2:3], v[6:7]
+; GCN-NEXT:    global_store_b64 v[4:5], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v4f16.v4f16.v4f16.v4f16(<4 x half> %A, <4 x half> %B, <4 x half> <half 3.0, half 3.0, half 3.0, half 3.0>, i1 0)
   store <4 x half> %res, ptr addrspace(1) %out
@@ -100,17 +101,17 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_imm(<4 x i16> %A, <4 x i16> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_bf16_16x16x16_bf16_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x3f803f80
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    v_mov_b32_e32 v7, s1
-; GFX12-NEXT:    v_mov_b32_e32 v6, s0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_bf16_16x16x16_bf16 v[6:7], v[0:1], v[2:3], v[6:7]
-; GFX12-NEXT:    global_store_b64 v[4:5], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_bf16_16x16x16_bf16_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x3f803f80
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    v_mov_b32_e32 v7, s1
+; GCN-NEXT:    v_mov_b32_e32 v6, s0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_bf16_16x16x16_bf16 v[6:7], v[0:1], v[2:3], v[6:7]
+; GCN-NEXT:    global_store_b64 v[4:5], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v4i16.v4i16.v4i16.v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> <i16 16256, i16 16256, i16 16256, i16 16256>, i1 0)
   store <4 x i16> %res, ptr addrspace(1) %out
@@ -118,17 +119,17 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_imm_non_inlineable(<4 x i16> %A, <4 x i16> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_bf16_16x16x16_bf16_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x3fc03fc0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    v_mov_b32_e32 v7, s1
-; GFX12-NEXT:    v_mov_b32_e32 v6, s0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_bf16_16x16x16_bf16 v[6:7], v[0:1], v[2:3], v[6:7]
-; GFX12-NEXT:    global_store_b64 v[4:5], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_bf16_16x16x16_bf16_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x3fc03fc0
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    v_mov_b32_e32 v7, s1
+; GCN-NEXT:    v_mov_b32_e32 v6, s0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_bf16_16x16x16_bf16 v[6:7], v[0:1], v[2:3], v[6:7]
+; GCN-NEXT:    global_store_b64 v[4:5], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v4i16.v4i16.v4i16.v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> <i16 16320, i16 16320, i16 16320, i16 16320>, i1 0)
   store <4 x i16> %res, ptr addrspace(1) %out
@@ -136,11 +137,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_imm(i32 %A, i32 %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:7], v0, v1, 1
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:7], v0, v1, 1
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32.i32.v4i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -148,21 +149,21 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_imm_non_inlineable(i32 %A, i32 %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_movk_i32 s0, 0x80
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    v_mov_b32_e32 v7, s3
-; GFX12-NEXT:    v_mov_b32_e32 v6, s2
-; GFX12-NEXT:    v_mov_b32_e32 v5, s1
-; GFX12-NEXT:    v_mov_b32_e32 v4, s0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:7], v0, v1, v[4:7]
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_movk_i32 s0, 0x80
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    v_mov_b32_e32 v7, s3
+; GCN-NEXT:    v_mov_b32_e32 v6, s2
+; GCN-NEXT:    v_mov_b32_e32 v5, s1
+; GCN-NEXT:    v_mov_b32_e32 v4, s0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:7], v0, v1, v[4:7]
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32.i32.v4i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> <i32 128, i32 128, i32 128, i32 128>, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -170,11 +171,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_imm(i32 %A, i32 %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:7], v0, v1, 1
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:7], v0, v1, 1
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32.i32.v4i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -182,21 +183,21 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_imm_non_inlineable(i32 %A, i32 %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_movk_i32 s0, 0x80
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    v_mov_b32_e32 v7, s3
-; GFX12-NEXT:    v_mov_b32_e32 v6, s2
-; GFX12-NEXT:    v_mov_b32_e32 v5, s1
-; GFX12-NEXT:    v_mov_b32_e32 v4, s0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:7], v0, v1, v[4:7]
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_movk_i32 s0, 0x80
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    v_mov_b32_e32 v7, s3
+; GCN-NEXT:    v_mov_b32_e32 v6, s2
+; GCN-NEXT:    v_mov_b32_e32 v5, s1
+; GCN-NEXT:    v_mov_b32_e32 v4, s0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:7], v0, v1, v[4:7]
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32.i32.v4i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> <i32 128, i32 128, i32 128, i32 128>, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -204,11 +205,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8_imm(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:7], v0, v1, 1.0
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:7], v0, v1, 1.0
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32.i32.v4f32(i32 %A, i32 %B, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -216,21 +217,21 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8_imm_non_inlineable(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    v_mov_b32_e32 v7, s3
-; GFX12-NEXT:    v_mov_b32_e32 v6, s2
-; GFX12-NEXT:    v_mov_b32_e32 v5, s1
-; GFX12-NEXT:    v_mov_b32_e32 v4, s0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:7], v0, v1, v[4:7]
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    v_mov_b32_e32 v7, s3
+; GCN-NEXT:    v_mov_b32_e32 v6, s2
+; GCN-NEXT:    v_mov_b32_e32 v5, s1
+; GCN-NEXT:    v_mov_b32_e32 v4, s0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:7], v0, v1, v[4:7]
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32.i32.v4f32(i32 %A, i32 %B, <4 x float> <float 3.0, float 3.0, float 3.0, float 3.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -238,11 +239,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8_imm(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:7], v0, v1, 1.0
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:7], v0, v1, 1.0
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32.i32.v4f32(i32 %A, i32 %B, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -250,21 +251,21 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8_imm_non_inlineable(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    v_mov_b32_e32 v7, s3
-; GFX12-NEXT:    v_mov_b32_e32 v6, s2
-; GFX12-NEXT:    v_mov_b32_e32 v5, s1
-; GFX12-NEXT:    v_mov_b32_e32 v4, s0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:7], v0, v1, v[4:7]
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    v_mov_b32_e32 v7, s3
+; GCN-NEXT:    v_mov_b32_e32 v6, s2
+; GCN-NEXT:    v_mov_b32_e32 v5, s1
+; GCN-NEXT:    v_mov_b32_e32 v4, s0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:7], v0, v1, v[4:7]
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32.i32.v4f32(i32 %A, i32 %B, <4 x float> <float 3.0, float 3.0, float 3.0, float 3.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -272,11 +273,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8_imm(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:7], v0, v1, 1.0
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:7], v0, v1, 1.0
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32.i32.v4f32(i32 %A, i32 %B, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -284,21 +285,21 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8_imm_non_inlineable(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    v_mov_b32_e32 v7, s3
-; GFX12-NEXT:    v_mov_b32_e32 v6, s2
-; GFX12-NEXT:    v_mov_b32_e32 v5, s1
-; GFX12-NEXT:    v_mov_b32_e32 v4, s0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:7], v0, v1, v[4:7]
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    v_mov_b32_e32 v7, s3
+; GCN-NEXT:    v_mov_b32_e32 v6, s2
+; GCN-NEXT:    v_mov_b32_e32 v5, s1
+; GCN-NEXT:    v_mov_b32_e32 v4, s0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:7], v0, v1, v[4:7]
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32.i32.v4f32(i32 %A, i32 %B, <4 x float> <float 3.0, float 3.0, float 3.0, float 3.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -306,11 +307,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8_imm(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:7], v0, v1, 1.0
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:7], v0, v1, 1.0
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32.i32.v4f32(i32 %A, i32 %B, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -318,21 +319,21 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8_imm_non_inlineable(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_mov_b32 s0, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    v_mov_b32_e32 v7, s3
-; GFX12-NEXT:    v_mov_b32_e32 v6, s2
-; GFX12-NEXT:    v_mov_b32_e32 v5, s1
-; GFX12-NEXT:    v_mov_b32_e32 v4, s0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:7], v0, v1, v[4:7]
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_mov_b32 s0, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    v_mov_b32_e32 v7, s3
+; GCN-NEXT:    v_mov_b32_e32 v6, s2
+; GCN-NEXT:    v_mov_b32_e32 v5, s1
+; GCN-NEXT:    v_mov_b32_e32 v4, s0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:7], v0, v1, v[4:7]
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32.i32.v4f32(i32 %A, i32 %B, <4 x float> <float 3.0, float 3.0, float 3.0, float 3.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -340,11 +341,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_imm(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:7], v0, v1, 1
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:7], v0, v1, 1
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32.i32.v4i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -352,21 +353,21 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_imm_non_inlineable(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    s_movk_i32 s0, 0x80
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_mov_b32 s3, s0
-; GFX12-NEXT:    s_mov_b32 s1, s0
-; GFX12-NEXT:    s_mov_b32 s2, s0
-; GFX12-NEXT:    v_mov_b32_e32 v7, s3
-; GFX12-NEXT:    v_mov_b32_e32 v6, s2
-; GFX12-NEXT:    v_mov_b32_e32 v5, s1
-; GFX12-NEXT:    v_mov_b32_e32 v4, s0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:7], v0, v1, v[4:7]
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_movk_i32 s0, 0x80
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    s_mov_b32 s3, s0
+; GCN-NEXT:    s_mov_b32 s1, s0
+; GCN-NEXT:    s_mov_b32 s2, s0
+; GCN-NEXT:    v_mov_b32_e32 v7, s3
+; GCN-NEXT:    v_mov_b32_e32 v6, s2
+; GCN-NEXT:    v_mov_b32_e32 v5, s1
+; GCN-NEXT:    v_mov_b32_e32 v4, s0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:7], v0, v1, v[4:7]
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32.i32.v4i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> <i32 128, i32 128, i32 128, i32 128>, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -384,3 +385,6 @@ declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32.i32.v4f32(i
 declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32.i32.v4f32(i32, i32, <4 x float>)
 declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32.i32.v4f32(i32, i32, <4 x float>)
 declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32.i32.v4i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX1170: {{.*}}
+; GFX12: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-iu-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-iu-modifiers.ll
index e47350db4003e..a87163b0dca14 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-iu-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-iu-modifiers.ll
@@ -1,12 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1170 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_zext_src0(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32.i32.v4i32(i1 1, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -14,11 +15,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_zext_src1(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32.i32.v4i32(i1 0, i32 %A, i1 1, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -26,11 +27,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_clamp(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] clamp
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] clamp
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32.i32.v4i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 1)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -40,11 +41,11 @@ bb:
 
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_zext_src0(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32.i32.v4i32(i1 1, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -52,11 +53,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_zext_src1(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32.i32.v4i32(i1 0, i32 %A, i1 1, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -64,11 +65,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_clamp(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] clamp
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] clamp
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32.i32.v4i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 1)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -78,11 +79,11 @@ bb:
 
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_zext_src0(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32.i32.v4i32(i1 1, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -90,11 +91,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_zext_src1(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32.i32.v4i32(i1 0, i32 %A, i1 1, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -102,11 +103,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_clamp(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] clamp
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] clamp
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32.i32.v4i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 1)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -119,11 +120,11 @@ bb:
 
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_zext_src0(i32 %A, <2 x i32> %B, <4 x i32> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu8_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.v4i32.i8(i1 1, i32 %A, i1 0, <2 x i32> %B, <4 x i32> %C, i8 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -131,11 +132,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_zext_src1(i32 %A, <2 x i32> %B, <4 x i32> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu8_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.v4i32.i8(i1 0, i32 %A, i1 1, <2 x i32> %B, <4 x i32> %C, i8 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -143,11 +144,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_clamp(i32 %A, <2 x i32> %B, <4 x i32> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 clamp
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu8_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 clamp
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.v4i32.i8(i1 0, i32 %A, i1 0, <2 x i32> %B, <4 x i32> %C, i8 %Index, i1 1)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -157,11 +158,11 @@ bb:
 
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_zext_src0(i32 %A, i32 %B, <4 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_lo:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[7:8], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu4_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_lo:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[7:8], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.v4i32.i16(i1 1, i32 %A, i1 0, i32 %B, <4 x i32> %C, i16 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -169,11 +170,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_zext_src1(i32 %A, i32 %B, <4 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_lo:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[7:8], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu4_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_lo:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[7:8], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.v4i32.i16(i1 0, i32 %A, i1 1, i32 %B, <4 x i32> %C, i16 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -181,11 +182,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_clamp(i32 %A, i32 %B, <4 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 clamp
-; GFX12-NEXT:    global_store_b128 v[7:8], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu4_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 clamp
+; GCN-NEXT:    global_store_b128 v[7:8], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.v4i32.i16(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> %C, i16 %Index, i1 1)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -195,11 +196,11 @@ bb:
 
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4_zext_src0(i32 %A, <2 x i32> %B, <4 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x64_iu4_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.v4i32.i16(i1 1, i32 %A, i1 0, <2 x i32> %B, <4 x i32> %C, i16 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -207,11 +208,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4_zext_src1(i32 %A, <2 x i32> %B, <4 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x64_iu4_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.v4i32.i16(i1 0, i32 %A, i1 1, <2 x i32> %B, <4 x i32> %C, i16 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -219,11 +220,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4_clamp(i32 %A, <2 x i32> %B, <4 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 clamp
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x64_iu4_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 clamp
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.v4i32.i16(i1 0, i32 %A, i1 0, <2 x i32> %B, <4 x i32> %C, i16 %Index, i1 1)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -236,3 +237,6 @@ declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32.i32.v4i32(i1 imma
 declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.v4i32.i8(i1 immarg, i32, i1 immarg, <2 x i32>, <4 x i32>, i8 %Index, i1 immarg)
 declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.v4i32.i16(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i16 %Index, i1 immarg)
 declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.v4i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <4 x i32>, i16 %Index, i1 immarg)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX1170: {{.*}}
+; GFX12: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll
index da6852042f7f5..7d31e262b4862 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll
@@ -1,7 +1,35 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1170 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_index_key(<4 x half> %A, <8 x half> %B, <4 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_f16_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v10, v[10:11], off
+; GFX1170-NEXT:    v_mov_b32_e32 v23, v9
+; GFX1170-NEXT:    v_mov_b32_e32 v22, v8
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v27, v9
+; GFX1170-NEXT:    v_mov_b32_e32 v26, v8
+; GFX1170-NEXT:    v_mov_b32_e32 v25, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v24, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v31, v9
+; GFX1170-NEXT:    v_mov_b32_e32 v30, v8
+; GFX1170-NEXT:    v_mov_b32_e32 v29, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v28, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_f16 v[20:23], v[0:1], v[2:5], v10
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_f16 v[24:27], v[0:1], v[2:5], v10 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_f16 v[28:31], v[0:1], v[2:5], v10 index_key:2
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 index_key:3
+; GFX1170-NEXT:    global_store_b128 v[12:13], v[20:23], off
+; GFX1170-NEXT:    global_store_b128 v[14:15], v[24:27], off
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[28:31], off
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[6:9], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v10, v[10:11], off
@@ -46,6 +74,33 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf16_index_key(<4 x i16> %A, <8 x i16> %B, <4 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_bf16_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v10, v[10:11], off
+; GFX1170-NEXT:    v_mov_b32_e32 v23, v9
+; GFX1170-NEXT:    v_mov_b32_e32 v22, v8
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v27, v9
+; GFX1170-NEXT:    v_mov_b32_e32 v26, v8
+; GFX1170-NEXT:    v_mov_b32_e32 v25, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v24, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v31, v9
+; GFX1170-NEXT:    v_mov_b32_e32 v30, v8
+; GFX1170-NEXT:    v_mov_b32_e32 v29, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v28, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf16 v[20:23], v[0:1], v[2:5], v10
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf16 v[24:27], v[0:1], v[2:5], v10 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf16 v[28:31], v[0:1], v[2:5], v10 index_key:2
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 index_key:3
+; GFX1170-NEXT:    global_store_b128 v[12:13], v[20:23], off
+; GFX1170-NEXT:    global_store_b128 v[14:15], v[24:27], off
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[28:31], off
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[6:9], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_bf16_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v10, v[10:11], off
@@ -90,6 +145,27 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_index_key(<4 x half> %A, <8 x half> %B, <4 x half> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_f16_16x16x32_f16_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v22, v[8:9], off
+; GFX1170-NEXT:    v_mov_b32_e32 v9, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v8, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v19, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v18, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_f16_16x16x32_f16 v[8:9], v[0:1], v[2:5], v22
+; GFX1170-NEXT:    v_swmmac_f16_16x16x32_f16 v[18:19], v[0:1], v[2:5], v22 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_f16_16x16x32_f16 v[20:21], v[0:1], v[2:5], v22 index_key:2
+; GFX1170-NEXT:    v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v22 index_key:3
+; GFX1170-NEXT:    global_store_b64 v[10:11], v[8:9], off
+; GFX1170-NEXT:    global_store_b64 v[12:13], v[18:19], off
+; GFX1170-NEXT:    global_store_b64 v[14:15], v[20:21], off
+; GFX1170-NEXT:    global_store_b64 v[16:17], v[6:7], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f16_16x16x32_f16_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v22, v[8:9], off
@@ -128,6 +204,27 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_bf16_16x16x32_bf16_index_key(<4 x i16> %A, <8 x i16> %B, <4 x i16> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_bf16_16x16x32_bf16_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v22, v[8:9], off
+; GFX1170-NEXT:    v_mov_b32_e32 v9, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v8, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v19, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v18, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[8:9], v[0:1], v[2:5], v22
+; GFX1170-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[18:19], v[0:1], v[2:5], v22 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[20:21], v[0:1], v[2:5], v22 index_key:2
+; GFX1170-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v22 index_key:3
+; GFX1170-NEXT:    global_store_b64 v[10:11], v[8:9], off
+; GFX1170-NEXT:    global_store_b64 v[12:13], v[18:19], off
+; GFX1170-NEXT:    global_store_b64 v[14:15], v[20:21], off
+; GFX1170-NEXT:    global_store_b64 v[16:17], v[6:7], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_bf16_16x16x32_bf16_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v22, v[8:9], off
@@ -166,6 +263,33 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_index_key(i32 %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_i32_16x16x32_iu8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v7, v[7:8], off
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v19, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v18, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v17, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v24, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v23, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v22, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v28, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v27, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v26, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v25, v3
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu8 v[17:20], v0, v[1:2], v7
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu8 v[21:24], v0, v[1:2], v7 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu8 v[25:28], v0, v[1:2], v7 index_key:2
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 index_key:3
+; GFX1170-NEXT:    global_store_b128 v[9:10], v[17:20], off
+; GFX1170-NEXT:    global_store_b128 v[11:12], v[21:24], off
+; GFX1170-NEXT:    global_store_b128 v[13:14], v[25:28], off
+; GFX1170-NEXT:    global_store_b128 v[15:16], v[3:6], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v7, v[7:8], off
@@ -210,6 +334,21 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_index_key(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_i32_16x16x32_iu4_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v6, v[6:7], off
+; GFX1170-NEXT:    v_mov_b32_e32 v15, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v14, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v13, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v12, v2
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu4 v[12:15], v0, v1, v6
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 index_key:1
+; GFX1170-NEXT:    global_store_b128 v[8:9], v[12:15], off
+; GFX1170-NEXT:    global_store_b128 v[10:11], v[2:5], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v6, v[6:7], off
@@ -236,6 +375,21 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4_index_key(i32 %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_i32_16x16x64_iu4_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v7, v[7:8], off
+; GFX1170-NEXT:    v_mov_b32_e32 v16, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v15, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v14, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v13, v3
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_i32_16x16x64_iu4 v[13:16], v0, v[1:2], v7
+; GFX1170-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 index_key:1
+; GFX1170-NEXT:    global_store_b128 v[9:10], v[13:16], off
+; GFX1170-NEXT:    global_store_b128 v[11:12], v[3:6], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v7, v[7:8], off
@@ -262,6 +416,33 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_fp8_index_key(i32 %A, <2 x i32> %B, <4 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_fp8_fp8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v7, v[7:8], off
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v19, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v18, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v17, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v24, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v23, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v22, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v28, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v27, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v26, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v25, v3
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[17:20], v0, v[1:2], v7
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[21:24], v0, v[1:2], v7 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[25:28], v0, v[1:2], v7 index_key:2
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 index_key:3
+; GFX1170-NEXT:    global_store_b128 v[9:10], v[17:20], off
+; GFX1170-NEXT:    global_store_b128 v[11:12], v[21:24], off
+; GFX1170-NEXT:    global_store_b128 v[13:14], v[25:28], off
+; GFX1170-NEXT:    global_store_b128 v[15:16], v[3:6], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_fp8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v7, v[7:8], off
@@ -306,6 +487,33 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_bf8_index_key(i32 %A, <2 x i32> %B, <4 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_fp8_bf8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v7, v[7:8], off
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v19, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v18, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v17, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v24, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v23, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v22, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v28, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v27, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v26, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v25, v3
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[17:20], v0, v[1:2], v7
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[21:24], v0, v[1:2], v7 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[25:28], v0, v[1:2], v7 index_key:2
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 index_key:3
+; GFX1170-NEXT:    global_store_b128 v[9:10], v[17:20], off
+; GFX1170-NEXT:    global_store_b128 v[11:12], v[21:24], off
+; GFX1170-NEXT:    global_store_b128 v[13:14], v[25:28], off
+; GFX1170-NEXT:    global_store_b128 v[15:16], v[3:6], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_bf8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v7, v[7:8], off
@@ -350,6 +558,33 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_fp8_index_key(i32 %A, <2 x i32> %B, <4 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_bf8_fp8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v7, v[7:8], off
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v19, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v18, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v17, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v24, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v23, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v22, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v28, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v27, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v26, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v25, v3
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[17:20], v0, v[1:2], v7
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[21:24], v0, v[1:2], v7 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[25:28], v0, v[1:2], v7 index_key:2
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 index_key:3
+; GFX1170-NEXT:    global_store_b128 v[9:10], v[17:20], off
+; GFX1170-NEXT:    global_store_b128 v[11:12], v[21:24], off
+; GFX1170-NEXT:    global_store_b128 v[13:14], v[25:28], off
+; GFX1170-NEXT:    global_store_b128 v[15:16], v[3:6], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_fp8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v7, v[7:8], off
@@ -394,6 +629,33 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_bf8_index_key(i32 %A, <2 x i32> %B, <4 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_bf8_bf8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v7, v[7:8], off
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v19, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v18, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v17, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v24, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v23, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v22, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v28, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v27, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v26, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v25, v3
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[17:20], v0, v[1:2], v7
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[21:24], v0, v[1:2], v7 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[25:28], v0, v[1:2], v7 index_key:2
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 index_key:3
+; GFX1170-NEXT:    global_store_b128 v[9:10], v[17:20], off
+; GFX1170-NEXT:    global_store_b128 v[11:12], v[21:24], off
+; GFX1170-NEXT:    global_store_b128 v[13:14], v[25:28], off
+; GFX1170-NEXT:    global_store_b128 v[15:16], v[3:6], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_bf8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v7, v[7:8], off
@@ -448,3 +710,5 @@ declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.v4f
 declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, i8)
 declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, i8)
 declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, i8)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64.ll
index 957b7b1b2c77c..bb256883c29ae 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64.ll
@@ -1,12 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1170 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float>@llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16.v4f16.v4f32(<4 x half> %A, <4 x half> %B, <4 x float> %C)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -14,11 +15,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<4 x i16> %A, <4 x i16> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16.v4i16.v4f32(<4 x i16> %A, <4 x i16> %B, <4 x float> %C)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -26,11 +27,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16(<4 x half> %A, <4 x half> %B, <4 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5]
-; GFX12-NEXT:    global_store_b64 v[6:7], v[4:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5]
+; GCN-NEXT:    global_store_b64 v[6:7], v[4:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v4f16.v4f16.v4f16.v4f16(<4 x half> %A, <4 x half> %B, <4 x half> %C, i1 0)
   store <4 x half> %res, ptr addrspace(1) %out
@@ -38,11 +39,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_bf16_16x16x16_bf16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5]
-; GFX12-NEXT:    global_store_b64 v[6:7], v[4:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_bf16_16x16x16_bf16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5]
+; GCN-NEXT:    global_store_b64 v[6:7], v[4:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v4i16.v4i16.v4i16.v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i1 0)
   store <4 x i16> %res, ptr addrspace(1) %out
@@ -50,11 +51,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32.i32.v4i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -62,11 +63,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32.i32.v4i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -74,11 +75,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32.i32.v4f32(i32 %A, i32 %B, <4 x float> %C)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -86,11 +87,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32.i32.v4f32(i32 %A, i32 %B, <4 x float> %C)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -98,11 +99,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32.i32.v4f32(i32 %A, i32 %B, <4 x float> %C)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -110,11 +111,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32.i32.v4f32(i32 %A, i32 %B, <4 x float> %C)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -122,11 +123,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32.i32.v4i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -134,11 +135,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16(<4 x half> %A, <8 x half> %B, <4 x float> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_f16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10
-; GFX12-NEXT:    global_store_b128 v[11:12], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_f16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10
+; GCN-NEXT:    global_store_b128 v[11:12], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.v4f32.i8(<4 x half> %A, <8 x half> %B, <4 x float> %C, i8 %Index)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -146,11 +147,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf16(<4 x i16> %A, <8 x i16> %B, <4 x float> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_bf16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10
-; GFX12-NEXT:    global_store_b128 v[11:12], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_bf16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10
+; GCN-NEXT:    global_store_b128 v[11:12], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v4f32.v4i16.v8i16.v4f32.i8(<4 x i16> %A, <8 x i16> %B, <4 x float> %C, i8 %Index)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -158,11 +159,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f16_16x16x32_f16(<4 x half> %A, <8 x half> %B, <4 x half> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f16_16x16x32_f16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8
-; GFX12-NEXT:    global_store_b64 v[9:10], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f16_16x16x32_f16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8
+; GCN-NEXT:    global_store_b64 v[9:10], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.v4f16.i8(<4 x half> %A, <8 x half> %B, <4 x half> %C, i8 %Index)
   store <4 x half> %res, ptr addrspace(1) %out
@@ -170,11 +171,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_bf16_16x16x32_bf16(<4 x i16> %A, <8 x i16> %B, <4 x i16> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_bf16_16x16x32_bf16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8
-; GFX12-NEXT:    global_store_b64 v[9:10], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_bf16_16x16x32_bf16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8
+; GCN-NEXT:    global_store_b64 v[9:10], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v4i16.v4i16.v8i16.v4i16.i8(<4 x i16> %A, <8 x i16> %B, <4 x i16> %C, i8 %Index)
   store <4 x i16> %res, ptr addrspace(1) %out
@@ -182,11 +183,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8(i32 %A, <2 x i32> %B, <4 x i32> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.v4i32.i8(i1 0, i32 %A, i1 0, <2 x i32> %B, <4 x i32> %C, i8 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -194,11 +195,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4(i32 %A, i32 %B, <4 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6
-; GFX12-NEXT:    global_store_b128 v[7:8], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu4:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6
+; GCN-NEXT:    global_store_b128 v[7:8], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.v4i32.i16(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> %C, i16 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -206,11 +207,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4(i32 %A, <2 x i32> %B, <4 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x64_iu4:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.v4i32.i16(i1 0, i32 %A, i1 0, <2 x i32> %B, <4 x i32> %C, i16 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -218,11 +219,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_fp8(i32 %A, <2 x i32> %B, <4 x float> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_fp8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_fp8_fp8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.v4f32.i8(i32 %A, <2 x i32> %B, <4 x float> %C, i8 %Index)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -230,11 +231,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_bf8(i32 %A, <2 x i32> %B, <4 x float> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_bf8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_fp8_bf8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.v4f32.i8(i32 %A, <2 x i32> %B, <4 x float> %C, i8 %Index)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -242,11 +243,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_fp8(i32 %A, <2 x i32> %B, <4 x float> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_fp8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_bf8_fp8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.v4f32.i8(i32 %A, <2 x i32> %B, <4 x float> %C, i8 %Index)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -254,11 +255,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_bf8(i32 %A, <2 x i32> %B, <4 x float> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_bf8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_bf8_bf8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.v4f32.i8(i32 %A, <2 x i32> %B, <4 x float> %C, i8 %Index)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -287,3 +288,6 @@ declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.v4f
 declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, i8)
 declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, i8)
 declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, i8)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX1170: {{.*}}
+; GFX12: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
index 4a010071d58c8..bc5c3283fb49e 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
@@ -1,14 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negA(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <8 x half> %A
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> %fneg.A, <8 x half> %B, <8 x float> %C)
@@ -17,13 +18,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negB(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negB:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negB:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <8 x half> %B
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> %A, <8 x half> %fneg.B, <8 x float> %C)
@@ -32,13 +33,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negC(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <8 x float> %C
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> %A, <8 x half> %B, <8 x float> %fneg.C)
@@ -47,13 +48,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_absC(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x float> @llvm.fabs.v8f32(<8 x float> %C)
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> %A, <8 x half> %B, <8 x float> %fabs.C)
@@ -62,13 +63,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16_negC(<8 x i16> %A, <8 x i16> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <8 x float> %C
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16(<8 x i16> %A, <8 x i16> %B, <8 x float> %fneg.C)
@@ -77,13 +78,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16_absC(<8 x i16> %A, <8 x i16> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x float> @llvm.fabs.v8f32(<8 x float> %C)
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16(<8 x i16> %A, <8 x i16> %B, <8 x float> %fabs.C)
@@ -92,11 +93,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negA(<8 x half> %A, <8 x half> %B, <8 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <8 x half> %A
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %fneg.A, <8 x half> %B, <8 x half> %C, i1 0)
@@ -105,11 +106,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negB(<8 x half> %A, <8 x half> %B, <8 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negB:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negB:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <8 x half> %B
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %fneg.B, <8 x half> %C, i1 0)
@@ -118,11 +119,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC(<8 x half> %A, <8 x half> %B, <8 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <8 x half> %C
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %fneg.C, i1 0)
@@ -131,11 +132,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_absC(<8 x half> %A, <8 x half> %B, <8 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x half> @llvm.fabs.v8f16(<8 x half> %C)
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %fabs.C, i1 0)
@@ -144,13 +145,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8_negC(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <8 x float> %C
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> %fneg.C)
@@ -159,13 +160,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8_absC(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x float> @llvm.fabs.v8f32(<8 x float> %C)
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> %fabs.C)
@@ -174,13 +175,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8_negC(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <8 x float> %C
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> %fneg.C)
@@ -189,13 +190,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8_absC(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x float> @llvm.fabs.v8f32(<8 x float> %C)
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> %fabs.C)
@@ -204,13 +205,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8_negC(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <8 x float> %C
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> %fneg.C)
@@ -219,13 +220,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8_absC(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x float> @llvm.fabs.v8f32(<8 x float> %C)
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> %fabs.C)
@@ -234,13 +235,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8_negC(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <8 x float> %C
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> %fneg.C)
@@ -249,13 +250,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8_absC(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x float> @llvm.fabs.v8f32(<8 x float> %C)
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> %fabs.C)
@@ -264,13 +265,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_negA(<8 x half> %A, <16 x half> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_negA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[21:22], v[16:19], off offset:16
-; GFX12-NEXT:    global_store_b128 v[21:22], v[12:15], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_f16_negA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[21:22], v[16:19], off offset:16
+; GCN-NEXT:    global_store_b128 v[21:22], v[12:15], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <8 x half> %A
   %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i16(<8 x half> %fneg.A, <16 x half> %B, <8 x float> %C, i16 %Index)
@@ -279,13 +280,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_negB(<8 x half> %A, <16 x half> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_negB:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[21:22], v[16:19], off offset:16
-; GFX12-NEXT:    global_store_b128 v[21:22], v[12:15], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_f16_negB:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[21:22], v[16:19], off offset:16
+; GCN-NEXT:    global_store_b128 v[21:22], v[12:15], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <16 x half> %B
   %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i16(<8 x half> %A, <16 x half> %fneg.B, <8 x float> %C, i16 %Index)
@@ -294,11 +295,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_negA(<8 x half> %A, <16 x half> %B, <8 x half> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f16_16x16x32_f16_negA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[17:18], v[12:15], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f16_16x16x32_f16_negA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[17:18], v[12:15], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <8 x half> %A
   %res = call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.i16(<8 x half> %fneg.A, <16 x half> %B, <8 x half> %C, i16 %Index)
@@ -307,11 +308,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_negB(<8 x half> %A, <16 x half> %B, <8 x half> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f16_16x16x32_f16_negB:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[17:18], v[12:15], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f16_16x16x32_f16_negB:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[17:18], v[12:15], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <16 x half> %B
   %res = call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.i16(<8 x half> %A, <16 x half> %fneg.B, <8 x half> %C, i16 %Index)
@@ -322,13 +323,13 @@ bb:
 ; both neg and abs patterns (wmma matrix C f32 or f16 )
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negabsC(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negabsC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negabsC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x float> @llvm.fabs.v8f32(<8 x float> %C)
   %fneg.fabs.C = fneg <8 x float> %fabs.C
@@ -338,11 +339,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negabsC(<8 x half> %A, <8 x half> %B, <8 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negabsC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negabsC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <8 x half> @llvm.fabs.v8f16(<8 x half> %C)
   %fneg.fabs.C = fneg <8 x half> %fabs.C
@@ -352,15 +353,15 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_neg_partial_fabsA(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_neg_partial_fabsA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_and_b32_e32 v11, 0x7fffffff, v11
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_neg_partial_fabsA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_and_b32_e32 v11, 0x7fffffff, v11
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %el3 = extractelement <8 x float> %C, i32 3
   %el3.fabs = call float @llvm.fabs.f32(float %el3)
@@ -374,13 +375,13 @@ bb:
 ; A or B matrix modifier and constant in C
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negA_constantC(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negA_constantC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[10:17], v[0:3], v[4:7], 1.0 neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negA_constantC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[10:17], v[0:3], v[4:7], 1.0 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <8 x half> %A
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> %fneg.A, <8 x half> %B, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>)
@@ -389,11 +390,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negB_constantC(<8 x half> %A, <8 x half> %B, <8 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negB_constantC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[10:13], v[0:3], v[4:7], 1.0 neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negB_constantC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[10:13], v[0:3], v[4:7], 1.0 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <8 x half> %B
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %fneg.B, <8 x half> <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>, i1 0)
@@ -404,6 +405,24 @@ bb:
 ; pack f16 elements with v_perm_b32 since they don't come from same b32
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<8 x half> %A, <8 x half> %B, ptr %Caddr, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_wmma_f16_16x16x16_f16_negC_pack:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    flat_load_b128 v[12:15], v[8:9] offset:16
+; GFX1170-NEXT:    flat_load_b128 v[16:19], v[8:9]
+; GFX1170-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
+; GFX1170-NEXT:    v_mov_b16_e32 v8.l, v15.l
+; GFX1170-NEXT:    v_mov_b16_e32 v9.l, v14.l
+; GFX1170-NEXT:    v_perm_b32 v14, v13, v12, 0x5040100
+; GFX1170-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX1170-NEXT:    v_perm_b32 v13, v19, v18, 0x5040100
+; GFX1170-NEXT:    v_perm_b32 v12, v17, v16, 0x5040100
+; GFX1170-NEXT:    v_perm_b32 v15, v8, v9, 0x5040100
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_wmma_f16_16x16x16_f16 v[12:15], v[0:3], v[4:7], v[12:15] neg_lo:[0,0,1]
+; GFX1170-NEXT:    global_store_b128 v[10:11], v[12:15], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negC_pack:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    s_clause 0x1
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll
index 1b44e8f01c0f9..2558dc3903640 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll
@@ -1,14 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_imm(<8 x half> %A, <8 x half> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[10:17], v[0:3], v[4:7], 1.0
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[10:17], v[0:3], v[4:7], 1.0
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> %A, <8 x half> %B, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -16,6 +17,24 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_imm_non_inlineable(<8 x half> %A, <8 x half> %B, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_wmma_f32_16x16x16_f16_imm_non_inlineable:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    v_mov_b32_e32 v10, 0x40400000
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_mov_b32_e32 v11, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v12, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v13, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v14, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v15, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v16, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v17, v10
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_wmma_f32_16x16x16_f16 v[10:17], v[0:3], v[4:7], v[10:17]
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_wmma_f32_16x16x16_f16_imm_non_inlineable:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    v_mov_b32_e32 v10, 0x40400000
@@ -36,13 +55,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16_imm(<8 x i16> %A, <8 x i16> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[10:17], v[0:3], v[4:7], 1.0
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[10:17], v[0:3], v[4:7], 1.0
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16(<8 x i16> %A, <8 x i16> %B, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -50,6 +69,24 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16_imm_non_inlineable(<8 x i16> %A, <8 x i16> %B, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_wmma_f32_16x16x16_bf16_imm_non_inlineable:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    v_mov_b32_e32 v10, 0x40400000
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_mov_b32_e32 v11, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v12, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v13, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v14, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v15, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v16, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v17, v10
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_wmma_f32_16x16x16_bf16 v[10:17], v[0:3], v[4:7], v[10:17]
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_wmma_f32_16x16x16_bf16_imm_non_inlineable:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    v_mov_b32_e32 v10, 0x40400000
@@ -70,11 +107,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_imm(<8 x half> %A, <8 x half> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[10:13], v[0:3], v[4:7], 1.0
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[10:13], v[0:3], v[4:7], 1.0
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>, i1 0)
   store <8 x half> %res, ptr addrspace(1) %out
@@ -82,6 +119,17 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_imm_non_inlineable(<8 x half> %A, <8 x half> %B, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_wmma_f16_16x16x16_f16_imm_non_inlineable:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    v_mov_b32_e32 v10, 0x42004200
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1170-NEXT:    v_mov_b32_e32 v11, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v12, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v13, v10
+; GFX1170-NEXT:    v_wmma_f16_16x16x16_f16 v[10:13], v[0:3], v[4:7], v[10:13]
+; GFX1170-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_wmma_f16_16x16x16_f16_imm_non_inlineable:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    v_mov_b32_e32 v10, 0x42004200
@@ -98,6 +146,17 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_imm(<8 x i16> %A, <8 x i16> %B, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_wmma_bf16_16x16x16_bf16_imm:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    v_mov_b32_e32 v10, 0x3f803f80
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1170-NEXT:    v_mov_b32_e32 v11, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v12, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v13, v10
+; GFX1170-NEXT:    v_wmma_bf16_16x16x16_bf16 v[10:13], v[0:3], v[4:7], v[10:13]
+; GFX1170-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_wmma_bf16_16x16x16_bf16_imm:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    v_mov_b32_e32 v10, 0x3f803f80
@@ -114,6 +173,17 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_imm_non_inlineable(<8 x i16> %A, <8 x i16> %B, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_wmma_bf16_16x16x16_bf16_imm_non_inlineable:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    v_mov_b32_e32 v10, 0x3fc03fc0
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1170-NEXT:    v_mov_b32_e32 v11, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v12, v10
+; GFX1170-NEXT:    v_mov_b32_e32 v13, v10
+; GFX1170-NEXT:    v_wmma_bf16_16x16x16_bf16 v[10:13], v[0:3], v[4:7], v[10:13]
+; GFX1170-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_wmma_bf16_16x16x16_bf16_imm_non_inlineable:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    v_mov_b32_e32 v10, 0x3fc03fc0
@@ -130,13 +200,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_imm(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[6:13], v[0:1], v[2:3], 1
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[6:13], v[0:1], v[2:3], 1
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -144,6 +214,24 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_wmma_i32_16x16x16_iu8_imm_non_inlineable:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    v_mov_b32_e32 v6, 0x80
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_mov_b32_e32 v7, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v8, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v9, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v10, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v11, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v12, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v13, v6
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_wmma_i32_16x16x16_iu8 v[6:13], v[0:1], v[2:3], v[6:13]
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_imm_non_inlineable:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    v_mov_b32_e32 v6, 0x80
@@ -164,13 +252,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_imm(i32 %A, i32 %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v0, v1, 1
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[2:3], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v0, v1, 1
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[2:3], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 0, i32 %A, i1 0, i32 %B, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -178,6 +266,24 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_imm_non_inlineable(i32 %A, i32 %B, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_wmma_i32_16x16x16_iu4_imm_non_inlineable:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    v_mov_b32_e32 v4, 0x80
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_mov_b32_e32 v5, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v6, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v7, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v8, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v9, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v10, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v11, v4
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v0, v1, v[4:11]
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[2:3], v[8:11], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_imm_non_inlineable:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    v_mov_b32_e32 v4, 0x80
@@ -198,13 +304,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8_imm(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[6:13], v[0:1], v[2:3], 1.0
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[6:13], v[0:1], v[2:3], 1.0
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -212,6 +318,24 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm_non_inlineable:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    v_mov_b32_e32 v6, 0x40400000
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_mov_b32_e32 v7, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v8, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v9, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v10, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v11, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v12, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v13, v6
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[6:13], v[0:1], v[2:3], v[6:13]
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm_non_inlineable:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    v_mov_b32_e32 v6, 0x40400000
@@ -232,13 +356,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8_imm(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[6:13], v[0:1], v[2:3], 1.0
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[6:13], v[0:1], v[2:3], 1.0
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -246,6 +370,24 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm_non_inlineable:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    v_mov_b32_e32 v6, 0x40400000
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_mov_b32_e32 v7, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v8, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v9, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v10, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v11, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v12, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v13, v6
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[6:13], v[0:1], v[2:3], v[6:13]
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm_non_inlineable:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    v_mov_b32_e32 v6, 0x40400000
@@ -266,13 +408,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8_imm(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[6:13], v[0:1], v[2:3], 1.0
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[6:13], v[0:1], v[2:3], 1.0
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -280,6 +422,24 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm_non_inlineable:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    v_mov_b32_e32 v6, 0x40400000
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_mov_b32_e32 v7, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v8, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v9, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v10, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v11, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v12, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v13, v6
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[6:13], v[0:1], v[2:3], v[6:13]
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm_non_inlineable:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    v_mov_b32_e32 v6, 0x40400000
@@ -300,13 +460,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8_imm(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[6:13], v[0:1], v[2:3], 1.0
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[6:13], v[0:1], v[2:3], 1.0
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -314,6 +474,24 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm_non_inlineable:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    v_mov_b32_e32 v6, 0x40400000
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_mov_b32_e32 v7, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v8, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v9, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v10, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v11, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v12, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v13, v6
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[6:13], v[0:1], v[2:3], v[6:13]
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm_non_inlineable:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    v_mov_b32_e32 v6, 0x40400000
@@ -334,13 +512,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_imm(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[6:13], v[0:1], v[2:3], 1
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[6:13], v[0:1], v[2:3], 1
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -348,6 +526,24 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
+; GFX1170-LABEL: test_wmma_i32_16x16x32_iu4_imm_non_inlineable:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    v_mov_b32_e32 v6, 0x80
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_mov_b32_e32 v7, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v8, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v9, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v10, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v11, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v12, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v13, v6
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_wmma_i32_16x16x32_iu4 v[6:13], v[0:1], v[2:3], v[6:13]
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_imm_non_inlineable:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    v_mov_b32_e32 v6, 0x80
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-iu-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-iu-modifiers.ll
index 945305848b3e1..9d8f26ea11cb8 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-iu-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-iu-modifiers.ll
@@ -1,14 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_zext_src0(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 1, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -16,13 +17,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_zext_src1(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 0, <2 x i32> %A, i1 1, <2 x i32> %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -30,13 +31,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] clamp
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] clamp
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 1)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -46,13 +47,13 @@ bb:
 
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_zext_src0(i32 %A, i32 %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_lo:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[10:11], v[6:9], off offset:16
-; GFX12-NEXT:    global_store_b128 v[10:11], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_lo:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[10:11], v[6:9], off offset:16
+; GCN-NEXT:    global_store_b128 v[10:11], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 1, i32 %A, i1 0, i32 %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -60,13 +61,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_zext_src1(i32 %A, i32 %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_lo:[0,1,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[10:11], v[6:9], off offset:16
-; GFX12-NEXT:    global_store_b128 v[10:11], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_lo:[0,1,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[10:11], v[6:9], off offset:16
+; GCN-NEXT:    global_store_b128 v[10:11], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 0, i32 %A, i1 1, i32 %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -74,13 +75,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_clamp(i32 %A, i32 %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] clamp
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[10:11], v[6:9], off offset:16
-; GFX12-NEXT:    global_store_b128 v[10:11], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] clamp
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[10:11], v[6:9], off offset:16
+; GCN-NEXT:    global_store_b128 v[10:11], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 0, i32 %A, i1 0, i32 %B, <8 x i32> %C, i1 1)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -90,13 +91,13 @@ bb:
 
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_zext_src0(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 1, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -104,13 +105,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_zext_src1(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 0, <2 x i32> %A, i1 1, <2 x i32> %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -118,13 +119,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] clamp
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] clamp
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 1)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -136,13 +137,13 @@ bb:
 
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_zext_src0(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu8_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i16(i1 1, <2 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i16 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -150,13 +151,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_zext_src1(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu8_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i16(i1 0, <2 x i32> %A, i1 1, <4 x i32> %B, <8 x i32> %C, i16 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -164,13 +165,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_clamp(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 clamp
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu8_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 clamp
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i16(i1 0, <2 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i16 %Index, i1 1)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -180,13 +181,13 @@ bb:
 
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_zext_src0(i32 %A, <2 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_lo:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[7:10], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu4_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_lo:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[7:10], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i16(i1 1, i32 %A, i1 0, <2 x i32> %B, <8 x i32> %C, i16 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -194,13 +195,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_zext_src1(i32 %A, <2 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_lo:[0,1,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[7:10], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu4_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_lo:[0,1,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[7:10], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i16(i1 0, i32 %A, i1 1, <2 x i32> %B, <8 x i32> %C, i16 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -208,13 +209,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_clamp(i32 %A, <2 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 clamp
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[7:10], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu4_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 clamp
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[7:10], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i16(i1 0, i32 %A, i1 0, <2 x i32> %B, <8 x i32> %C, i16 %Index, i1 1)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -224,13 +225,13 @@ bb:
 
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4_zext_src0(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i32 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x64_iu4_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i32(i1 1, <2 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i32 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -238,13 +239,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4_zext_src1(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i32 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x64_iu4_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i32(i1 0, <2 x i32> %A, i1 1, <4 x i32> %B, <8 x i32> %C, i32 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -252,13 +253,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4_clamp(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i32 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 clamp
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x64_iu4_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 clamp
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i32(i1 0, <2 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i32 %Index, i1 1)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -271,3 +272,6 @@ declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 immarg, <2 x
 declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i16(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i16 %Index, i1 immarg)
 declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <8 x i32>, i16 %Index, i1 immarg)
 declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i32(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i32 %Index, i1 immarg)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX1170: {{.*}}
+; GFX12: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll
index cd7edc21718c9..f7dd2d189a2b2 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll
@@ -1,7 +1,27 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_index_key(<8 x half> %A, <16 x half> %B, <8 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_f16_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v20, v[20:21], off
+; GFX1170-NEXT:    v_dual_mov_b32 v33, v19 :: v_dual_mov_b32 v32, v18
+; GFX1170-NEXT:    v_dual_mov_b32 v31, v17 :: v_dual_mov_b32 v30, v16
+; GFX1170-NEXT:    v_dual_mov_b32 v29, v15 :: v_dual_mov_b32 v28, v14
+; GFX1170-NEXT:    v_dual_mov_b32 v27, v13 :: v_dual_mov_b32 v26, v12
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_f16 v[26:33], v[0:3], v[4:11], v20
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 index_key:1
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[22:23], v[30:33], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[22:23], v[26:29], off
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[24:25], v[16:19], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[24:25], v[12:15], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v20, v[20:21], off
@@ -32,6 +52,25 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf16_index_key(<8 x i16> %A, <16 x i16> %B, <8 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_bf16_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v20, v[20:21], off
+; GFX1170-NEXT:    v_dual_mov_b32 v33, v19 :: v_dual_mov_b32 v32, v18
+; GFX1170-NEXT:    v_dual_mov_b32 v31, v17 :: v_dual_mov_b32 v30, v16
+; GFX1170-NEXT:    v_dual_mov_b32 v29, v15 :: v_dual_mov_b32 v28, v14
+; GFX1170-NEXT:    v_dual_mov_b32 v27, v13 :: v_dual_mov_b32 v26, v12
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf16 v[26:33], v[0:3], v[4:11], v20
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 index_key:1
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[22:23], v[30:33], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[22:23], v[26:29], off
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[24:25], v[16:19], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[24:25], v[12:15], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_bf16_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v20, v[20:21], off
@@ -62,6 +101,19 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_index_key(<8 x half> %A, <16 x half> %B, <8 x half> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_f16_16x16x32_f16_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v16, v[16:17], off
+; GFX1170-NEXT:    v_dual_mov_b32 v25, v15 :: v_dual_mov_b32 v24, v14
+; GFX1170-NEXT:    v_dual_mov_b32 v23, v13 :: v_dual_mov_b32 v22, v12
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_f16_16x16x32_f16 v[22:25], v[0:3], v[4:11], v16
+; GFX1170-NEXT:    v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 index_key:1
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[22:25], off
+; GFX1170-NEXT:    global_store_b128 v[20:21], v[12:15], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f16_16x16x32_f16_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v16, v[16:17], off
@@ -86,6 +138,19 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_bf16_16x16x32_bf16_index_key(<8 x i16> %A, <16 x i16> %B, <8 x i16> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_bf16_16x16x32_bf16_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v16, v[16:17], off
+; GFX1170-NEXT:    v_dual_mov_b32 v25, v15 :: v_dual_mov_b32 v24, v14
+; GFX1170-NEXT:    v_dual_mov_b32 v23, v13 :: v_dual_mov_b32 v22, v12
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[22:25], v[0:3], v[4:11], v16
+; GFX1170-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 index_key:1
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[22:25], off
+; GFX1170-NEXT:    global_store_b128 v[20:21], v[12:15], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_bf16_16x16x32_bf16_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v16, v[16:17], off
@@ -110,6 +175,25 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_index_key(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_i32_16x16x32_iu8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v14, v[14:15], off
+; GFX1170-NEXT:    v_dual_mov_b32 v27, v13 :: v_dual_mov_b32 v26, v12
+; GFX1170-NEXT:    v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
+; GFX1170-NEXT:    v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
+; GFX1170-NEXT:    v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu8 v[20:27], v[0:1], v[2:5], v14
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 index_key:1
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[24:27], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[20:23], off
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[10:13], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[6:9], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v14, v[14:15], off
@@ -140,6 +224,25 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_index_key(i32 %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_i32_16x16x32_iu4_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v11, v[11:12], off
+; GFX1170-NEXT:    v_dual_mov_b32 v24, v10 :: v_dual_mov_b32 v23, v9
+; GFX1170-NEXT:    v_dual_mov_b32 v22, v8 :: v_dual_mov_b32 v21, v7
+; GFX1170-NEXT:    v_dual_mov_b32 v20, v6 :: v_dual_mov_b32 v19, v5
+; GFX1170-NEXT:    v_dual_mov_b32 v18, v4 :: v_dual_mov_b32 v17, v3
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu4 v[17:24], v0, v[1:2], v11
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 index_key:1
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[13:14], v[21:24], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[13:14], v[17:20], off
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[15:16], v[7:10], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[15:16], v[3:6], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v11, v[11:12], off
@@ -170,6 +273,25 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_fp8_index_key(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_fp8_fp8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v14, v[14:15], off
+; GFX1170-NEXT:    v_dual_mov_b32 v27, v13 :: v_dual_mov_b32 v26, v12
+; GFX1170-NEXT:    v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
+; GFX1170-NEXT:    v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
+; GFX1170-NEXT:    v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[20:27], v[0:1], v[2:5], v14
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:1
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[24:27], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[20:23], off
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[10:13], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[6:9], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_fp8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v14, v[14:15], off
@@ -200,6 +322,25 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_bf8_index_key(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_fp8_bf8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v14, v[14:15], off
+; GFX1170-NEXT:    v_dual_mov_b32 v27, v13 :: v_dual_mov_b32 v26, v12
+; GFX1170-NEXT:    v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
+; GFX1170-NEXT:    v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
+; GFX1170-NEXT:    v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[20:27], v[0:1], v[2:5], v14
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:1
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[24:27], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[20:23], off
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[10:13], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[6:9], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_bf8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v14, v[14:15], off
@@ -230,6 +371,25 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_fp8_index_key(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_bf8_fp8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v14, v[14:15], off
+; GFX1170-NEXT:    v_dual_mov_b32 v27, v13 :: v_dual_mov_b32 v26, v12
+; GFX1170-NEXT:    v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
+; GFX1170-NEXT:    v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
+; GFX1170-NEXT:    v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[20:27], v[0:1], v[2:5], v14
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:1
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[24:27], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[20:23], off
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[10:13], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[6:9], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_fp8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v14, v[14:15], off
@@ -260,6 +420,25 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_bf8_index_key(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_bf8_bf8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v14, v[14:15], off
+; GFX1170-NEXT:    v_dual_mov_b32 v27, v13 :: v_dual_mov_b32 v26, v12
+; GFX1170-NEXT:    v_dual_mov_b32 v25, v11 :: v_dual_mov_b32 v24, v10
+; GFX1170-NEXT:    v_dual_mov_b32 v23, v9 :: v_dual_mov_b32 v22, v8
+; GFX1170-NEXT:    v_dual_mov_b32 v21, v7 :: v_dual_mov_b32 v20, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[20:27], v[0:1], v[2:5], v14
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:1
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[24:27], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[20:23], off
+; GFX1170-NEXT:    s_clause 0x1
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[10:13], off offset:16
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[6:9], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_bf8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v14, v[14:15], off
@@ -299,3 +478,5 @@ declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.i
 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll
index d67625248669a..0993c00c30415 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll
@@ -1,14 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> %A, <8 x half> %B, <8 x float> %C)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -16,13 +17,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<8 x i16> %A, <8 x i16> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
-; GFX12-NEXT:    global_store_b128 v[16:17], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[16:17], v[12:15], off offset:16
+; GCN-NEXT:    global_store_b128 v[16:17], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16(<8 x i16> %A, <8 x i16> %B, <8 x float> %C)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -30,11 +31,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16(<8 x half> %A, <8 x half> %B, <8 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11]
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11]
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %C, i1 0)
   store <8 x half> %res, ptr addrspace(1) %out
@@ -42,11 +43,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_bf16_16x16x16_bf16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11]
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_bf16_16x16x16_bf16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11]
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i1 0)
   store <8 x i16> %res, ptr addrspace(1) %out
@@ -54,13 +55,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -68,13 +69,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4(i32 %A, i32 %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[10:11], v[6:9], off offset:16
-; GFX12-NEXT:    global_store_b128 v[10:11], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[10:11], v[6:9], off offset:16
+; GCN-NEXT:    global_store_b128 v[10:11], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 0, i32 %A, i1 0, i32 %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -82,13 +83,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> %C)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -96,13 +97,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> %C)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -110,13 +111,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> %C)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -124,13 +125,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> %C)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -138,13 +139,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -153,13 +154,13 @@ bb:
 
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16(<8 x half> %A, <16 x half> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_f16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[21:22], v[16:19], off offset:16
-; GFX12-NEXT:    global_store_b128 v[21:22], v[12:15], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_f16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[21:22], v[16:19], off offset:16
+; GCN-NEXT:    global_store_b128 v[21:22], v[12:15], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i16(<8 x half> %A, <16 x half> %B, <8 x float> %C, i16 %Index)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -167,13 +168,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf16(<8 x i16> %A, <16 x i16> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_bf16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[21:22], v[16:19], off offset:16
-; GFX12-NEXT:    global_store_b128 v[21:22], v[12:15], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_bf16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[21:22], v[16:19], off offset:16
+; GCN-NEXT:    global_store_b128 v[21:22], v[12:15], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.i16(<8 x i16> %A, <16 x i16> %B, <8 x float> %C, i16 %Index)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -181,11 +182,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f16_16x16x32_f16(<8 x half> %A, <16 x half> %B, <8 x half> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f16_16x16x32_f16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16
-; GFX12-NEXT:    global_store_b128 v[17:18], v[12:15], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f16_16x16x32_f16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16
+; GCN-NEXT:    global_store_b128 v[17:18], v[12:15], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.i16(<8 x half> %A, <16 x half> %B, <8 x half> %C, i16 %Index)
   store <8 x half> %res, ptr addrspace(1) %out
@@ -193,11 +194,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_bf16_16x16x32_bf16(<8 x i16> %A, <16 x i16> %B, <8 x i16> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_bf16_16x16x32_bf16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16
-; GFX12-NEXT:    global_store_b128 v[17:18], v[12:15], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_bf16_16x16x32_bf16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16
+; GCN-NEXT:    global_store_b128 v[17:18], v[12:15], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.i16(<8 x i16> %A, <16 x i16> %B, <8 x i16> %C, i16 %Index)
   store <8 x i16> %res, ptr addrspace(1) %out
@@ -205,13 +206,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i16(i1 0, <2 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i16 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -219,13 +220,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4(i32 %A, <2 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[12:13], v[7:10], off offset:16
-; GFX12-NEXT:    global_store_b128 v[12:13], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu4:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[12:13], v[7:10], off offset:16
+; GCN-NEXT:    global_store_b128 v[12:13], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i16(i1 0, i32 %A, i1 0, <2 x i32> %B, <8 x i32> %C, i16 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -233,13 +234,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i32 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x64_iu4:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i32(i1 0, <2 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i32 %Index, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -247,13 +248,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_fp8(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_fp8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_fp8_fp8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.i16(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -261,13 +262,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_bf8(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_bf8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_fp8_bf8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -275,13 +276,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_fp8(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_fp8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_bf8_fp8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.i16(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -289,13 +290,13 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_bf8(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_bf8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[15:16], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_bf8_bf8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[15:16], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[15:16], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -324,3 +325,6 @@ declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.i
 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX1170: {{.*}}
+; GFX12: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
index 53bede84513c9..1a2d59e969590 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GFX12,GFX12-TRUE16
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GFX12,GFX12-FAKE16
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+real-true16 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX1170-TRUE16
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=-real-true16 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX1170-FAKE16
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX12-TRUE16
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX12-FAKE16
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negA(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <4 x half> %A
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half> %fneg.A, <4 x half> %B, <4 x float> %C)
@@ -16,11 +18,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negB(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negB:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negB:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <4 x half> %B
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half> %A, <4 x half> %fneg.B, <4 x float> %C)
@@ -29,11 +31,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negC(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <4 x float> %C
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half> %A, <4 x half> %B, <4 x float> %fneg.C)
@@ -42,11 +44,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_absC(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x float> @llvm.fabs.v4f32(<4 x float> %C)
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half> %A, <4 x half> %B, <4 x float> %fabs.C)
@@ -55,11 +57,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16_negC(<4 x i16> %A, <4 x i16> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <4 x float> %C
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16(<4 x i16> %A, <4 x i16> %B, <4 x float> %fneg.C)
@@ -68,11 +70,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16_absC(<4 x i16> %A, <4 x i16> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x float> @llvm.fabs.v4f32(<4 x float> %C)
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16(<4 x i16> %A, <4 x i16> %B, <4 x float> %fabs.C)
@@ -81,11 +83,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negA(<4 x half> %A, <4 x half> %B, <4 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    global_store_b64 v[6:7], v[4:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    global_store_b64 v[6:7], v[4:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <4 x half> %A
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %fneg.A, <4 x half> %B, <4 x half> %C, i1 0)
@@ -94,11 +96,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negB(<4 x half> %A, <4 x half> %B, <4 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negB:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    global_store_b64 v[6:7], v[4:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negB:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    global_store_b64 v[6:7], v[4:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <4 x half> %B
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %fneg.B, <4 x half> %C, i1 0)
@@ -107,11 +109,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC(<4 x half> %A, <4 x half> %B, <4 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b64 v[6:7], v[4:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b64 v[6:7], v[4:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <4 x half> %C
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %B, <4 x half> %fneg.C, i1 0)
@@ -120,11 +122,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_absC(<4 x half> %A, <4 x half> %B, <4 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b64 v[6:7], v[4:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b64 v[6:7], v[4:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x half> @llvm.fabs.v4f16(<4 x half> %C)
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %B, <4 x half> %fabs.C, i1 0)
@@ -133,11 +135,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8_negC(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <4 x float> %C
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32 %A, i32 %B, <4 x float> %fneg.C)
@@ -146,11 +148,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8_absC(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x float> @llvm.fabs.v4f32(<4 x float> %C)
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32 %A, i32 %B, <4 x float> %fabs.C)
@@ -159,11 +161,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8_negC(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <4 x float> %C
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32 %A, i32 %B, <4 x float> %fneg.C)
@@ -172,11 +174,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8_absC(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x float> @llvm.fabs.v4f32(<4 x float> %C)
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32 %A, i32 %B, <4 x float> %fabs.C)
@@ -185,11 +187,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8_negC(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <4 x float> %C
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32 %A, i32 %B, <4 x float> %fneg.C)
@@ -198,11 +200,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8_absC(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x float> @llvm.fabs.v4f32(<4 x float> %C)
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32 %A, i32 %B, <4 x float> %fabs.C)
@@ -211,11 +213,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8_negC(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8_negC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8_negC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.C = fneg <4 x float> %C
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32 %A, i32 %B, <4 x float> %fneg.C)
@@ -224,11 +226,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8_absC(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8_absC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8_absC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x float> @llvm.fabs.v4f32(<4 x float> %C)
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32 %A, i32 %B, <4 x float> %fabs.C)
@@ -237,11 +239,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_negA(<4 x half> %A, <8 x half> %B, <4 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_negA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[11:12], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_f16_negA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[11:12], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <4 x half> %A
   %res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i16(<4 x half> %fneg.A, <8 x half> %B, <4 x float> %C, i16 %Index)
@@ -250,11 +252,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_negB(<4 x half> %A, <8 x half> %B, <4 x float> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_negB:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[11:12], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_f16_negB:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[11:12], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <8 x half> %B
   %res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i16(<4 x half> %A, <8 x half> %fneg.B, <4 x float> %C, i16 %Index)
@@ -263,11 +265,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_negA(<4 x half> %A, <8 x half> %B, <4 x half> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f16_16x16x32_f16_negA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    global_store_b64 v[9:10], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f16_16x16x32_f16_negA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    global_store_b64 v[9:10], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <4 x half> %A
   %res = call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i16(<4 x half> %fneg.A, <8 x half> %B, <4 x half> %C, i16 %Index)
@@ -276,11 +278,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_negB(<4 x half> %A, <8 x half> %B, <4 x half> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f16_16x16x32_f16_negB:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    global_store_b64 v[9:10], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f16_16x16x32_f16_negB:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    global_store_b64 v[9:10], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <8 x half> %B
   %res = call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i16(<4 x half> %A, <8 x half> %fneg.B, <4 x half> %C, i16 %Index)
@@ -291,11 +293,11 @@ bb:
 ; both neg and abs patterns (wmma matrix C f32 or f16 )
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negabsC(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negabsC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negabsC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x float> @llvm.fabs.v4f32(<4 x float> %C)
   %fneg.fabs.C = fneg <4 x float> %fabs.C
@@ -305,11 +307,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negabsC(<4 x half> %A, <4 x half> %B, <4 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negabsC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX12-NEXT:    global_store_b64 v[6:7], v[4:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negabsC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GCN-NEXT:    global_store_b64 v[6:7], v[4:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fabs.C = call <4 x half> @llvm.fabs.v4f16(<4 x half> %C)
   %fneg.fabs.C = fneg <4 x half> %fabs.C
@@ -319,13 +321,13 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_neg_partial_fabsA(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_neg_partial_fabsA:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_and_b32_e32 v7, 0x7fffffff, v7
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_neg_partial_fabsA:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_and_b32_e32 v7, 0x7fffffff, v7
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %el3 = extractelement <4 x float> %C, i32 3
   %el3.fabs = call float @llvm.fabs.f32(float %el3)
@@ -339,11 +341,11 @@ bb:
 ; A or B matrix modifier and constant in C
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_negA_constantC(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_negA_constantC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[6:9], v[0:1], v[2:3], 1.0 neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_negA_constantC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[6:9], v[0:1], v[2:3], 1.0 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.A = fneg <4 x half> %A
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half> %fneg.A, <4 x half> %B, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
@@ -352,11 +354,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negB_constantC(<4 x half> %A, <4 x half> %B, <4 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_negB_constantC:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[6:7], v[0:1], v[2:3], 1.0 neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX12-NEXT:    global_store_b64 v[4:5], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_negB_constantC:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[6:7], v[0:1], v[2:3], 1.0 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GCN-NEXT:    global_store_b64 v[4:5], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %fneg.B = fneg <4 x half> %B
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %fneg.B, <4 x half> <half 1.0, half 1.0, half 1.0, half 1.0>, i1 0)
@@ -367,6 +369,29 @@ bb:
 ; pack f16 elements with v_perm_b32 since they don't come from same b32
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<4 x half> %A, <4 x half> %B, ptr %Caddr, ptr addrspace(1) %out) {
+; GFX1170-TRUE16-LABEL: test_wmma_f16_16x16x16_f16_negC_pack:
+; GFX1170-TRUE16:       ; %bb.0: ; %bb
+; GFX1170-TRUE16-NEXT:    flat_load_b128 v[8:11], v[4:5]
+; GFX1170-TRUE16-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX1170-TRUE16-NEXT:    v_mov_b16_e32 v10.h, v11.l
+; GFX1170-TRUE16-NEXT:    v_mov_b16_e32 v8.h, v9.l
+; GFX1170-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1170-TRUE16-NEXT:    v_mov_b32_e32 v9, v10
+; GFX1170-TRUE16-NEXT:    v_wmma_f16_16x16x16_f16 v[8:9], v[0:1], v[2:3], v[8:9] neg_lo:[0,0,1]
+; GFX1170-TRUE16-NEXT:    global_store_b64 v[6:7], v[8:9], off
+; GFX1170-TRUE16-NEXT:    s_endpgm
+;
+; GFX1170-FAKE16-LABEL: test_wmma_f16_16x16x16_f16_negC_pack:
+; GFX1170-FAKE16:       ; %bb.0: ; %bb
+; GFX1170-FAKE16-NEXT:    flat_load_b128 v[8:11], v[4:5]
+; GFX1170-FAKE16-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX1170-FAKE16-NEXT:    v_perm_b32 v5, v11, v10, 0x5040100
+; GFX1170-FAKE16-NEXT:    v_perm_b32 v4, v9, v8, 0x5040100
+; GFX1170-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-FAKE16-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1]
+; GFX1170-FAKE16-NEXT:    global_store_b64 v[6:7], v[4:5], off
+; GFX1170-FAKE16-NEXT:    s_endpgm
+;
 ; GFX12-TRUE16-LABEL: test_wmma_f16_16x16x16_f16_negC_pack:
 ; GFX12-TRUE16:       ; %bb.0: ; %bb
 ; GFX12-TRUE16-NEXT:    flat_load_b128 v[8:11], v[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll
index a8f5726632aa1..a4222338a5038 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll
@@ -1,12 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_imm(<4 x half> %A, <4 x half> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[6:9], v[0:1], v[2:3], 1.0
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[6:9], v[0:1], v[2:3], 1.0
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float>@llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half> %A, <4 x half> %B, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -14,16 +15,16 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_imm_non_inlineable(<4 x half> %A, <4 x half> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v6, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_mov_b32_e32 v7, v6
-; GFX12-NEXT:    v_mov_b32_e32 v8, v6
-; GFX12-NEXT:    v_mov_b32_e32 v9, v6
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[6:9], v[0:1], v[2:3], v[6:9]
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v6, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_mov_b32_e32 v7, v6
+; GCN-NEXT:    v_mov_b32_e32 v8, v6
+; GCN-NEXT:    v_mov_b32_e32 v9, v6
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[6:9], v[0:1], v[2:3], v[6:9]
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float>@llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half> %A, <4 x half> %B, <4 x float> <float 3.0, float 3.0, float 3.0, float 3.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -31,11 +32,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16_imm(<4 x i16> %A, <4 x i16> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[6:9], v[0:1], v[2:3], 1.0
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[6:9], v[0:1], v[2:3], 1.0
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16(<4 x i16> %A, <4 x i16> %B, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -43,16 +44,16 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16_imm_non_inlineable(<4 x i16> %A, <4 x i16> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v6, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_mov_b32_e32 v7, v6
-; GFX12-NEXT:    v_mov_b32_e32 v8, v6
-; GFX12-NEXT:    v_mov_b32_e32 v9, v6
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[6:9], v[0:1], v[2:3], v[6:9]
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v6, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_mov_b32_e32 v7, v6
+; GCN-NEXT:    v_mov_b32_e32 v8, v6
+; GCN-NEXT:    v_mov_b32_e32 v9, v6
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[6:9], v[0:1], v[2:3], v[6:9]
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16(<4 x i16> %A, <4 x i16> %B, <4 x float> <float 3.0, float 3.0, float 3.0, float 3.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -60,11 +61,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_imm(<4 x half> %A, <4 x half> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[6:7], v[0:1], v[2:3], 1.0
-; GFX12-NEXT:    global_store_b64 v[4:5], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[6:7], v[0:1], v[2:3], 1.0
+; GCN-NEXT:    global_store_b64 v[4:5], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v4f16.v4f16(<4 x half> %A, <4 x half> %B, <4 x half> <half 1.0, half 1.0, half 1.0, half 1.0>, i1 0)
   store <4 x half> %res, ptr addrspace(1) %out
@@ -72,14 +73,14 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_imm_non_inlineable(<4 x half> %A, <4 x half> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v6, 0x42004200
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_mov_b32_e32 v7, v6
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[6:7], v[0:1], v[2:3], v[6:7]
-; GFX12-NEXT:    global_store_b64 v[4:5], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v6, 0x42004200
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_mov_b32_e32 v7, v6
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[6:7], v[0:1], v[2:3], v[6:7]
+; GCN-NEXT:    global_store_b64 v[4:5], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v4f16.v4f16(<4 x half> %A, <4 x half> %B, <4 x half> <half 3.0, half 3.0, half 3.0, half 3.0>, i1 0)
   store <4 x half> %res, ptr addrspace(1) %out
@@ -87,14 +88,14 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_imm(<4 x i16> %A, <4 x i16> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_bf16_16x16x16_bf16_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v6, 0x3f803f80
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_mov_b32_e32 v7, v6
-; GFX12-NEXT:    v_wmma_bf16_16x16x16_bf16 v[6:7], v[0:1], v[2:3], v[6:7]
-; GFX12-NEXT:    global_store_b64 v[4:5], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_bf16_16x16x16_bf16_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v6, 0x3f803f80
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_mov_b32_e32 v7, v6
+; GCN-NEXT:    v_wmma_bf16_16x16x16_bf16 v[6:7], v[0:1], v[2:3], v[6:7]
+; GCN-NEXT:    global_store_b64 v[4:5], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v4i16.v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> <i16 16256, i16 16256, i16 16256, i16 16256>, i1 0)
   store <4 x i16> %res, ptr addrspace(1) %out
@@ -102,14 +103,14 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_imm_non_inlineable(<4 x i16> %A, <4 x i16> %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_bf16_16x16x16_bf16_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v6, 0x3fc03fc0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_mov_b32_e32 v7, v6
-; GFX12-NEXT:    v_wmma_bf16_16x16x16_bf16 v[6:7], v[0:1], v[2:3], v[6:7]
-; GFX12-NEXT:    global_store_b64 v[4:5], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_bf16_16x16x16_bf16_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v6, 0x3fc03fc0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_mov_b32_e32 v7, v6
+; GCN-NEXT:    v_wmma_bf16_16x16x16_bf16 v[6:7], v[0:1], v[2:3], v[6:7]
+; GCN-NEXT:    global_store_b64 v[4:5], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v4i16.v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> <i16 16320, i16 16320, i16 16320, i16 16320>, i1 0)
   store <4 x i16> %res, ptr addrspace(1) %out
@@ -117,11 +118,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_imm(i32 %A, i32 %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:7], v0, v1, 1
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:7], v0, v1, 1
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -129,16 +130,16 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_imm_non_inlineable(i32 %A, i32 %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v4, 0x80
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_mov_b32_e32 v5, v4
-; GFX12-NEXT:    v_mov_b32_e32 v6, v4
-; GFX12-NEXT:    v_mov_b32_e32 v7, v4
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:7], v0, v1, v[4:7]
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v4, 0x80
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_mov_b32_e32 v5, v4
+; GCN-NEXT:    v_mov_b32_e32 v6, v4
+; GCN-NEXT:    v_mov_b32_e32 v7, v4
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[4:7], v0, v1, v[4:7]
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> <i32 128, i32 128, i32 128, i32 128>, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -146,11 +147,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_imm(i32 %A, i32 %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:7], v0, v1, 1
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:7], v0, v1, 1
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -158,16 +159,16 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_imm_non_inlineable(i32 %A, i32 %B, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v4, 0x80
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_mov_b32_e32 v5, v4
-; GFX12-NEXT:    v_mov_b32_e32 v6, v4
-; GFX12-NEXT:    v_mov_b32_e32 v7, v4
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:7], v0, v1, v[4:7]
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v4, 0x80
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_mov_b32_e32 v5, v4
+; GCN-NEXT:    v_mov_b32_e32 v6, v4
+; GCN-NEXT:    v_mov_b32_e32 v7, v4
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:7], v0, v1, v[4:7]
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> <i32 128, i32 128, i32 128, i32 128>, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -175,11 +176,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8_imm(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:7], v0, v1, 1.0
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:7], v0, v1, 1.0
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32 %A, i32 %B, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -187,16 +188,16 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8_imm_non_inlineable(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v4, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_mov_b32_e32 v5, v4
-; GFX12-NEXT:    v_mov_b32_e32 v6, v4
-; GFX12-NEXT:    v_mov_b32_e32 v7, v4
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:7], v0, v1, v[4:7]
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v4, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_mov_b32_e32 v5, v4
+; GCN-NEXT:    v_mov_b32_e32 v6, v4
+; GCN-NEXT:    v_mov_b32_e32 v7, v4
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[4:7], v0, v1, v[4:7]
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32 %A, i32 %B, <4 x float> <float 3.0, float 3.0, float 3.0, float 3.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -204,11 +205,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8_imm(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:7], v0, v1, 1.0
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:7], v0, v1, 1.0
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32 %A, i32 %B, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -216,16 +217,16 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8_imm_non_inlineable(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v4, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_mov_b32_e32 v5, v4
-; GFX12-NEXT:    v_mov_b32_e32 v6, v4
-; GFX12-NEXT:    v_mov_b32_e32 v7, v4
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:7], v0, v1, v[4:7]
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v4, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_mov_b32_e32 v5, v4
+; GCN-NEXT:    v_mov_b32_e32 v6, v4
+; GCN-NEXT:    v_mov_b32_e32 v7, v4
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[4:7], v0, v1, v[4:7]
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32 %A, i32 %B, <4 x float> <float 3.0, float 3.0, float 3.0, float 3.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -233,11 +234,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8_imm(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:7], v0, v1, 1.0
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:7], v0, v1, 1.0
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32 %A, i32 %B, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -245,16 +246,16 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8_imm_non_inlineable(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v4, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_mov_b32_e32 v5, v4
-; GFX12-NEXT:    v_mov_b32_e32 v6, v4
-; GFX12-NEXT:    v_mov_b32_e32 v7, v4
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:7], v0, v1, v[4:7]
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v4, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_mov_b32_e32 v5, v4
+; GCN-NEXT:    v_mov_b32_e32 v6, v4
+; GCN-NEXT:    v_mov_b32_e32 v7, v4
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[4:7], v0, v1, v[4:7]
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32 %A, i32 %B, <4 x float> <float 3.0, float 3.0, float 3.0, float 3.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -262,11 +263,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8_imm(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:7], v0, v1, 1.0
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:7], v0, v1, 1.0
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32 %A, i32 %B, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -274,16 +275,16 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8_imm_non_inlineable(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v4, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_mov_b32_e32 v5, v4
-; GFX12-NEXT:    v_mov_b32_e32 v6, v4
-; GFX12-NEXT:    v_mov_b32_e32 v7, v4
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:7], v0, v1, v[4:7]
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v4, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_mov_b32_e32 v5, v4
+; GCN-NEXT:    v_mov_b32_e32 v6, v4
+; GCN-NEXT:    v_mov_b32_e32 v7, v4
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[4:7], v0, v1, v[4:7]
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32 %A, i32 %B, <4 x float> <float 3.0, float 3.0, float 3.0, float 3.0>)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -291,11 +292,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_imm(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:7], v0, v1, 1
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:7], v0, v1, 1
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -303,16 +304,16 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_imm_non_inlineable(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v4, 0x80
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_mov_b32_e32 v5, v4
-; GFX12-NEXT:    v_mov_b32_e32 v6, v4
-; GFX12-NEXT:    v_mov_b32_e32 v7, v4
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:7], v0, v1, v[4:7]
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v4, 0x80
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_mov_b32_e32 v5, v4
+; GCN-NEXT:    v_mov_b32_e32 v6, v4
+; GCN-NEXT:    v_mov_b32_e32 v7, v4
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[4:7], v0, v1, v[4:7]
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> <i32 128, i32 128, i32 128, i32 128>, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -330,3 +331,6 @@ declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32, i32, <
 declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32, i32, <4 x float>)
 declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32, i32, <4 x float>)
 declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX1170: {{.*}}
+; GFX12: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-iu-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-iu-modifiers.ll
index 9303dbfad437f..baeb81ab62957 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-iu-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-iu-modifiers.ll
@@ -1,12 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_zext_src0(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 1, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -14,11 +15,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_zext_src1(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 0, i32 %A, i1 1, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -26,11 +27,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_clamp(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] clamp
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] clamp
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 1)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -40,11 +41,11 @@ bb:
 
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_zext_src0(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 1, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -52,11 +53,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_zext_src1(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 0, i32 %A, i1 1, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -64,11 +65,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_clamp(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] clamp
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] clamp
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 1)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -78,11 +79,11 @@ bb:
 
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_zext_src0(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 1, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -90,11 +91,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_zext_src1(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 0, i32 %A, i1 1, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -102,11 +103,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_clamp(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] clamp
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] clamp
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 1)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -114,11 +115,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_zext_src0(i32 %A, <2 x i32> %B, <4 x i32> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu8_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i8(i1 1, i32 %A, i1 0, <2 x i32> %B, <4 x i32> %C, i8 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -126,11 +127,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_zext_src1(i32 %A, <2 x i32> %B, <4 x i32> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu8_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i8(i1 0, i32 %A, i1 1, <2 x i32> %B, <4 x i32> %C, i8 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -138,11 +139,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_clamp(i32 %A, <2 x i32> %B, <4 x i32> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 clamp
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu8_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 clamp
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i8(i1 0, i32 %A, i1 0, <2 x i32> %B, <4 x i32> %C, i8 %Index, i1 1)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -152,11 +153,11 @@ bb:
 
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_zext_src0(i32 %A, i32 %B, <4 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_lo:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[7:8], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu4_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_lo:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[7:8], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i16(i1 1, i32 %A, i1 0, i32 %B, <4 x i32> %C, i16 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -164,11 +165,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_zext_src1(i32 %A, i32 %B, <4 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_lo:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[7:8], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu4_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_lo:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[7:8], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i16(i1 0, i32 %A, i1 1, i32 %B, <4 x i32> %C, i16 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -176,11 +177,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_clamp(i32 %A, i32 %B, <4 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 clamp
-; GFX12-NEXT:    global_store_b128 v[7:8], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu4_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 clamp
+; GCN-NEXT:    global_store_b128 v[7:8], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i16(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> %C, i16 %Index, i1 1)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -190,11 +191,11 @@ bb:
 
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4_zext_src0(i32 %A, <2 x i32> %B, <4 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4_zext_src0:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x64_iu4_zext_src0:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0]
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i16(i1 1, i32 %A, i1 0, <2 x i32> %B, <4 x i32> %C, i16 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -202,11 +203,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4_zext_src1(i32 %A, <2 x i32> %B, <4 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4_zext_src1:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x64_iu4_zext_src1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0]
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i16(i1 0, i32 %A, i1 1, <2 x i32> %B, <4 x i32> %C, i16 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -214,11 +215,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4_clamp(i32 %A, <2 x i32> %B, <4 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4_clamp:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 clamp
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x64_iu4_clamp:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 clamp
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i16(i1 0, i32 %A, i1 0, <2 x i32> %B, <4 x i32> %C, i16 %Index, i1 1)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -231,3 +232,6 @@ declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 immarg, i32, i
 declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i8(i1 immarg, i32, i1 immarg, <2 x i32>, <4 x i32>, i8 %Index, i1 immarg)
 declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i16(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i16 %Index, i1 immarg)
 declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <4 x i32>, i16 %Index, i1 immarg)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX1170: {{.*}}
+; GFX12: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll
index fdfec74e01b7b..183230a1242bf 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll
@@ -1,7 +1,35 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16_index_key(<4 x half> %A, <8 x half> %B, <4 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_f16_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v10, v[10:11], off
+; GFX1170-NEXT:    v_mov_b32_e32 v23, v9
+; GFX1170-NEXT:    v_mov_b32_e32 v22, v8
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v27, v9
+; GFX1170-NEXT:    v_mov_b32_e32 v26, v8
+; GFX1170-NEXT:    v_mov_b32_e32 v25, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v24, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v31, v9
+; GFX1170-NEXT:    v_mov_b32_e32 v30, v8
+; GFX1170-NEXT:    v_mov_b32_e32 v29, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v28, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_f16 v[20:23], v[0:1], v[2:5], v10
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_f16 v[24:27], v[0:1], v[2:5], v10 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_f16 v[28:31], v[0:1], v[2:5], v10 index_key:2
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 index_key:3
+; GFX1170-NEXT:    global_store_b128 v[12:13], v[20:23], off
+; GFX1170-NEXT:    global_store_b128 v[14:15], v[24:27], off
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[28:31], off
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[6:9], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_f16_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v10, v[10:11], off
@@ -46,6 +74,33 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf16_index_key(<4 x i16> %A, <8 x i16> %B, <4 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_bf16_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v10, v[10:11], off
+; GFX1170-NEXT:    v_mov_b32_e32 v23, v9
+; GFX1170-NEXT:    v_mov_b32_e32 v22, v8
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v27, v9
+; GFX1170-NEXT:    v_mov_b32_e32 v26, v8
+; GFX1170-NEXT:    v_mov_b32_e32 v25, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v24, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v31, v9
+; GFX1170-NEXT:    v_mov_b32_e32 v30, v8
+; GFX1170-NEXT:    v_mov_b32_e32 v29, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v28, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf16 v[20:23], v[0:1], v[2:5], v10
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf16 v[24:27], v[0:1], v[2:5], v10 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf16 v[28:31], v[0:1], v[2:5], v10 index_key:2
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 index_key:3
+; GFX1170-NEXT:    global_store_b128 v[12:13], v[20:23], off
+; GFX1170-NEXT:    global_store_b128 v[14:15], v[24:27], off
+; GFX1170-NEXT:    global_store_b128 v[16:17], v[28:31], off
+; GFX1170-NEXT:    global_store_b128 v[18:19], v[6:9], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_bf16_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v10, v[10:11], off
@@ -90,6 +145,27 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f16_16x16x32_f16_index_key(<4 x half> %A, <8 x half> %B, <4 x half> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_f16_16x16x32_f16_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v22, v[8:9], off
+; GFX1170-NEXT:    v_mov_b32_e32 v9, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v8, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v19, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v18, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_f16_16x16x32_f16 v[8:9], v[0:1], v[2:5], v22
+; GFX1170-NEXT:    v_swmmac_f16_16x16x32_f16 v[18:19], v[0:1], v[2:5], v22 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_f16_16x16x32_f16 v[20:21], v[0:1], v[2:5], v22 index_key:2
+; GFX1170-NEXT:    v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v22 index_key:3
+; GFX1170-NEXT:    global_store_b64 v[10:11], v[8:9], off
+; GFX1170-NEXT:    global_store_b64 v[12:13], v[18:19], off
+; GFX1170-NEXT:    global_store_b64 v[14:15], v[20:21], off
+; GFX1170-NEXT:    global_store_b64 v[16:17], v[6:7], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f16_16x16x32_f16_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v22, v[8:9], off
@@ -128,6 +204,27 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_bf16_16x16x32_bf16_index_key(<4 x i16> %A, <8 x i16> %B, <4 x i16> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_bf16_16x16x32_bf16_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v22, v[8:9], off
+; GFX1170-NEXT:    v_mov_b32_e32 v9, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v8, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v19, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v18, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v7
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[8:9], v[0:1], v[2:5], v22
+; GFX1170-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[18:19], v[0:1], v[2:5], v22 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[20:21], v[0:1], v[2:5], v22 index_key:2
+; GFX1170-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v22 index_key:3
+; GFX1170-NEXT:    global_store_b64 v[10:11], v[8:9], off
+; GFX1170-NEXT:    global_store_b64 v[12:13], v[18:19], off
+; GFX1170-NEXT:    global_store_b64 v[14:15], v[20:21], off
+; GFX1170-NEXT:    global_store_b64 v[16:17], v[6:7], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_bf16_16x16x32_bf16_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v22, v[8:9], off
@@ -166,6 +263,33 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8_index_key(i32 %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_i32_16x16x32_iu8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v7, v[7:8], off
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v19, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v18, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v17, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v24, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v23, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v22, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v28, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v27, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v26, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v25, v3
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu8 v[17:20], v0, v[1:2], v7
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu8 v[21:24], v0, v[1:2], v7 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu8 v[25:28], v0, v[1:2], v7 index_key:2
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 index_key:3
+; GFX1170-NEXT:    global_store_b128 v[9:10], v[17:20], off
+; GFX1170-NEXT:    global_store_b128 v[11:12], v[21:24], off
+; GFX1170-NEXT:    global_store_b128 v[13:14], v[25:28], off
+; GFX1170-NEXT:    global_store_b128 v[15:16], v[3:6], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v7, v[7:8], off
@@ -210,6 +334,21 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4_index_key(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_i32_16x16x32_iu4_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v6, v[6:7], off
+; GFX1170-NEXT:    v_mov_b32_e32 v15, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v14, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v13, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v12, v2
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu4 v[12:15], v0, v1, v6
+; GFX1170-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 index_key:1
+; GFX1170-NEXT:    global_store_b128 v[8:9], v[12:15], off
+; GFX1170-NEXT:    global_store_b128 v[10:11], v[2:5], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v6, v[6:7], off
@@ -236,6 +375,21 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4_index_key(i32 %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
+; GFX1170-LABEL: test_swmmac_i32_16x16x64_iu4_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v7, v[7:8], off
+; GFX1170-NEXT:    v_mov_b32_e32 v16, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v15, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v14, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v13, v3
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1170-NEXT:    v_swmmac_i32_16x16x64_iu4 v[13:16], v0, v[1:2], v7
+; GFX1170-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 index_key:1
+; GFX1170-NEXT:    global_store_b128 v[9:10], v[13:16], off
+; GFX1170-NEXT:    global_store_b128 v[11:12], v[3:6], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v7, v[7:8], off
@@ -262,6 +416,33 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_fp8_index_key(i32 %A, <2 x i32> %B, <4 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_fp8_fp8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v7, v[7:8], off
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v19, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v18, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v17, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v24, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v23, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v22, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v28, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v27, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v26, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v25, v3
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[17:20], v0, v[1:2], v7
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[21:24], v0, v[1:2], v7 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[25:28], v0, v[1:2], v7 index_key:2
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 index_key:3
+; GFX1170-NEXT:    global_store_b128 v[9:10], v[17:20], off
+; GFX1170-NEXT:    global_store_b128 v[11:12], v[21:24], off
+; GFX1170-NEXT:    global_store_b128 v[13:14], v[25:28], off
+; GFX1170-NEXT:    global_store_b128 v[15:16], v[3:6], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_fp8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v7, v[7:8], off
@@ -306,6 +487,33 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_bf8_index_key(i32 %A, <2 x i32> %B, <4 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_fp8_bf8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v7, v[7:8], off
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v19, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v18, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v17, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v24, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v23, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v22, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v28, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v27, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v26, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v25, v3
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[17:20], v0, v[1:2], v7
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[21:24], v0, v[1:2], v7 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[25:28], v0, v[1:2], v7 index_key:2
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 index_key:3
+; GFX1170-NEXT:    global_store_b128 v[9:10], v[17:20], off
+; GFX1170-NEXT:    global_store_b128 v[11:12], v[21:24], off
+; GFX1170-NEXT:    global_store_b128 v[13:14], v[25:28], off
+; GFX1170-NEXT:    global_store_b128 v[15:16], v[3:6], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_bf8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v7, v[7:8], off
@@ -350,6 +558,33 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_fp8_index_key(i32 %A, <2 x i32> %B, <4 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_bf8_fp8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v7, v[7:8], off
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v19, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v18, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v17, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v24, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v23, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v22, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v28, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v27, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v26, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v25, v3
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[17:20], v0, v[1:2], v7
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[21:24], v0, v[1:2], v7 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[25:28], v0, v[1:2], v7 index_key:2
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 index_key:3
+; GFX1170-NEXT:    global_store_b128 v[9:10], v[17:20], off
+; GFX1170-NEXT:    global_store_b128 v[11:12], v[21:24], off
+; GFX1170-NEXT:    global_store_b128 v[13:14], v[25:28], off
+; GFX1170-NEXT:    global_store_b128 v[15:16], v[3:6], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_fp8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v7, v[7:8], off
@@ -394,6 +629,33 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_bf8_index_key(i32 %A, <2 x i32> %B, <4 x float> %C, ptr addrspace(1) %IndexVecPtr, ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) {
+; GFX1170-LABEL: test_swmmac_f32_16x16x32_bf8_bf8_index_key:
+; GFX1170:       ; %bb.0: ; %bb
+; GFX1170-NEXT:    global_load_b32 v7, v[7:8], off
+; GFX1170-NEXT:    v_mov_b32_e32 v20, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v19, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v18, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v17, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v24, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v23, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v22, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v21, v3
+; GFX1170-NEXT:    v_mov_b32_e32 v28, v6
+; GFX1170-NEXT:    v_mov_b32_e32 v27, v5
+; GFX1170-NEXT:    v_mov_b32_e32 v26, v4
+; GFX1170-NEXT:    v_mov_b32_e32 v25, v3
+; GFX1170-NEXT:    s_waitcnt vmcnt(0)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[17:20], v0, v[1:2], v7
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[21:24], v0, v[1:2], v7 index_key:1
+; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[25:28], v0, v[1:2], v7 index_key:2
+; GFX1170-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 index_key:3
+; GFX1170-NEXT:    global_store_b128 v[9:10], v[17:20], off
+; GFX1170-NEXT:    global_store_b128 v[11:12], v[21:24], off
+; GFX1170-NEXT:    global_store_b128 v[13:14], v[25:28], off
+; GFX1170-NEXT:    global_store_b128 v[15:16], v[3:6], off
+; GFX1170-NEXT:    s_endpgm
+;
 ; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_bf8_index_key:
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    global_load_b32 v7, v[7:8], off
@@ -448,3 +710,5 @@ declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.i8(
 declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.i8(i32, <2 x i32>, <4 x float>, i8)
 declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.i8(i32, <2 x i32>, <4 x float>, i8)
 declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.i8(i32, <2 x i32>, <4 x float>, i8)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll
index 896efb06d5595..60dc7cc766f75 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll
@@ -1,12 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefix=GFX12
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX1170
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<4 x half> %A, <4 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float>@llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half> %A, <4 x half> %B, <4 x float> %C)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -14,11 +15,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<4 x i16> %A, <4 x i16> %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7]
+; GCN-NEXT:    global_store_b128 v[8:9], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16(<4 x i16> %A, <4 x i16> %B, <4 x float> %C)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -26,11 +27,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16(<4 x half> %A, <4 x half> %B, <4 x half> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5]
-; GFX12-NEXT:    global_store_b64 v[6:7], v[4:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5]
+; GCN-NEXT:    global_store_b64 v[6:7], v[4:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v4f16.v4f16(<4 x half> %A, <4 x half> %B, <4 x half> %C, i1 0)
   store <4 x half> %res, ptr addrspace(1) %out
@@ -38,11 +39,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_bf16_16x16x16_bf16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5]
-; GFX12-NEXT:    global_store_b64 v[6:7], v[4:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_bf16_16x16x16_bf16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5]
+; GCN-NEXT:    global_store_b64 v[6:7], v[4:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v4i16.v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i1 0)
   store <4 x i16> %res, ptr addrspace(1) %out
@@ -50,11 +51,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -62,11 +63,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -74,11 +75,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32 %A, i32 %B, <4 x float> %C)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -86,11 +87,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32 %A, i32 %B, <4 x float> %C)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -98,11 +99,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32 %A, i32 %B, <4 x float> %C)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -110,11 +111,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8(i32 %A, i32 %B, <4 x float> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32 %A, i32 %B, <4 x float> %C)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -122,11 +123,11 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4(i32 %A, i32 %B, <4 x i32> %C, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5]
-; GFX12-NEXT:    global_store_b128 v[6:7], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5]
+; GCN-NEXT:    global_store_b128 v[6:7], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> %C, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -134,11 +135,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16(<4 x half> %A, <8 x half> %B, <4 x float> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_f16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10
-; GFX12-NEXT:    global_store_b128 v[11:12], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_f16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10
+; GCN-NEXT:    global_store_b128 v[11:12], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i8(<4 x half> %A, <8 x half> %B, <4 x float> %C, i8 %Index)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -146,11 +147,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf16(<4 x i16> %A, <8 x i16> %B, <4 x float> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_bf16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10
-; GFX12-NEXT:    global_store_b128 v[11:12], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_bf16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10
+; GCN-NEXT:    global_store_b128 v[11:12], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v4f32.v4i16.v8i16.i8(<4 x i16> %A, <8 x i16> %B, <4 x float> %C, i8 %Index)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -158,11 +159,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f16_16x16x32_f16(<4 x half> %A, <8 x half> %B, <4 x half> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f16_16x16x32_f16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8
-; GFX12-NEXT:    global_store_b64 v[9:10], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f16_16x16x32_f16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8
+; GCN-NEXT:    global_store_b64 v[9:10], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i8(<4 x half> %A, <8 x half> %B, <4 x half> %C, i8 %Index)
   store <4 x half> %res, ptr addrspace(1) %out
@@ -170,11 +171,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_bf16_16x16x32_bf16(<4 x i16> %A, <8 x i16> %B, <4 x i16> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_bf16_16x16x32_bf16:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8
-; GFX12-NEXT:    global_store_b64 v[9:10], v[6:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_bf16_16x16x32_bf16:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8
+; GCN-NEXT:    global_store_b64 v[9:10], v[6:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v4i16.v4i16.v8i16.i8(<4 x i16> %A, <8 x i16> %B, <4 x i16> %C, i8 %Index)
   store <4 x i16> %res, ptr addrspace(1) %out
@@ -182,11 +183,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8(i32 %A, <2 x i32> %B, <4 x i32> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i8(i1 0, i32 %A, i1 0, <2 x i32> %B, <4 x i32> %C, i8 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -194,11 +195,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4(i32 %A, i32 %B, <4 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6
-; GFX12-NEXT:    global_store_b128 v[7:8], v[2:5], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x32_iu4:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6
+; GCN-NEXT:    global_store_b128 v[7:8], v[2:5], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i16(i1 0, i32 %A, i1 0, i32 %B, <4 x i32> %C, i16 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -206,11 +207,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4(i32 %A, <2 x i32> %B, <4 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_i32_16x16x64_iu4:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i16(i1 0, i32 %A, i1 0, <2 x i32> %B, <4 x i32> %C, i16 %Index, i1 0)
   store <4 x i32> %res, ptr addrspace(1) %out
@@ -218,11 +219,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_fp8(i32 %A, <2 x i32> %B, <4 x float> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_fp8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_fp8_fp8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.i8(i32 %A, <2 x i32> %B, <4 x float> %C, i8 %Index)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -230,11 +231,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_bf8(i32 %A, <2 x i32> %B, <4 x float> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_bf8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_fp8_bf8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.i8(i32 %A, <2 x i32> %B, <4 x float> %C, i8 %Index)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -242,11 +243,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_fp8(i32 %A, <2 x i32> %B, <4 x float> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_fp8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_bf8_fp8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.i8(i32 %A, <2 x i32> %B, <4 x float> %C, i8 %Index)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -254,11 +255,11 @@ bb:
 }
 
 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_bf8(i32 %A, <2 x i32> %B, <4 x float> %C, i8 %Index, ptr addrspace(1) %out) {
-; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_bf8:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7
-; GFX12-NEXT:    global_store_b128 v[8:9], v[3:6], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_swmmac_f32_16x16x32_bf8_bf8:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7
+; GCN-NEXT:    global_store_b128 v[8:9], v[3:6], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.i8(i32 %A, <2 x i32> %B, <4 x float> %C, i8 %Index)
   store <4 x float> %res, ptr addrspace(1) %out
@@ -287,3 +288,6 @@ declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.i8(
 declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.i8(i32, <2 x i32>, <4 x float>, i8)
 declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.i8(i32, <2 x i32>, <4 x float>, i8)
 declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.i8(i32, <2 x i32>, <4 x float>, i8)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX1170: {{.*}}
+; GFX12: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w32.mir b/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w32.mir
index ef85de2012943..897bd2d8517a4 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w32.mir
+++ b/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w32.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GFX12 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX1170 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX12 %s
 
 # D0 overlaps A1, B1, C1 or Index1. Overlap starts at vgpr0.
 #  $D0 = wmma0 $A0, $B0, $C0 or $D0 = swmmac0 $A0, $B0, $C0, $Index0
@@ -11,12 +12,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45
 
-    ; GFX12-LABEL: name: test_wmma_f32_16x16x16_f16_D0_overlaps_A1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35, 8, killed $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_f32_16x16x16_f16_D0_overlaps_A1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35, 8, killed $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35, 8, killed $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43, 0, 0, implicit $exec
 ...
@@ -27,12 +28,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45
 
-    ; GFX12-LABEL: name: test_wmma_f32_16x16x16_bf16_D0_overlaps_B1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43 = V_WMMA_F32_16X16X16_BF16_w32_twoaddr 8, killed $vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 8, killed $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_f32_16x16x16_bf16_D0_overlaps_B1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43 = V_WMMA_F32_16X16X16_BF16_w32_twoaddr 8, killed $vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 8, killed $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43 = V_WMMA_F32_16X16X16_BF16_w32_twoaddr 8, killed $vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 8, killed $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43, 0, 0, implicit $exec
 ...
@@ -43,11 +44,11 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
 
-    ; GFX12-LABEL: name: test_wmma_f16_16x16x16_f16_D0_overlaps_C1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F16_16X16X16_F16_w32_twoaddr 8, killed $vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_f16_16x16x16_f16_D0_overlaps_C1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F16_16X16X16_F16_w32_twoaddr 8, killed $vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F16_16X16X16_F16_w32_twoaddr 8, killed $vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
 ...
@@ -58,12 +59,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
 
-    ; GFX12-LABEL: name: test_wmma_bf16_16x16x16_bf16_D0_overlaps_A1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_BF16_16X16X16_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35, 8, killed $vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_bf16_16x16x16_bf16_D0_overlaps_A1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_BF16_16X16X16_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35, 8, killed $vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_BF16_16X16X16_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35, 8, killed $vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
 ...
@@ -73,12 +74,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
 
-    ; GFX12-LABEL: name: test_wmma_i32_16x16x16_iu8_D0_overlaps_B1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_I32_16X16X16_IU8_w32_twoaddr 8, killed $vgpr28_vgpr29, 8, killed $vgpr0_vgpr1, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_i32_16x16x16_iu8_D0_overlaps_B1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_I32_16X16X16_IU8_w32_twoaddr 8, killed $vgpr28_vgpr29, 8, killed $vgpr0_vgpr1, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_I32_16X16X16_IU8_w32_twoaddr 8, killed $vgpr28_vgpr29, 8, killed $vgpr0_vgpr1, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
 ...
@@ -89,11 +90,11 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39
 
-    ; GFX12-LABEL: name: test_wmma_i32_16x16x16_iu4_D0_overlaps_C1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_I32_16X16X16_IU4_w32_twoaddr 8, killed $vgpr28, 8, killed $vgpr29, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_i32_16x16x16_iu4_D0_overlaps_C1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_I32_16X16X16_IU4_w32_twoaddr 8, killed $vgpr28, 8, killed $vgpr29, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_I32_16X16X16_IU4_w32_twoaddr 8, killed $vgpr28, 8, killed $vgpr29, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
 ...
@@ -104,12 +105,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
 
-    ; GFX12-LABEL: name: test_wmma_f32_16x16x16_fp8_fp8_D0_overlaps_A1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X16_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1, killed $vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_f32_16x16x16_fp8_fp8_D0_overlaps_A1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X16_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1, killed $vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X16_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1, killed $vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
 ...
@@ -120,12 +121,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
 
-    ; GFX12-LABEL: name: test_wmma_f32_16x16x16_bf8_fp8_D0_overlaps_B1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X16_BF8_FP8_w32_twoaddr killed $vgpr28_vgpr29, killed $vgpr0_vgpr1, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_f32_16x16x16_bf8_fp8_D0_overlaps_B1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X16_BF8_FP8_w32_twoaddr killed $vgpr28_vgpr29, killed $vgpr0_vgpr1, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X16_BF8_FP8_w32_twoaddr killed $vgpr28_vgpr29, killed $vgpr0_vgpr1, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
 ...
@@ -136,11 +137,11 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
 
-    ; GFX12-LABEL: name: test_wmma_f32_16x16x16_fp8_bf8_D0_overlaps_C1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_FP8_BF8_w32_twoaddr killed $vgpr28_vgpr29, killed $vgpr30_vgpr31, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_f32_16x16x16_fp8_bf8_D0_overlaps_C1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_FP8_BF8_w32_twoaddr killed $vgpr28_vgpr29, killed $vgpr30_vgpr31, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_FP8_BF8_w32_twoaddr killed $vgpr28_vgpr29, killed $vgpr30_vgpr31, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
 ...
@@ -151,12 +152,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
 
-    ; GFX12-LABEL: name: test_wmma_f32_16x16x16_bf8_bf8_D0_overlaps_A1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X16_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1, killed $vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_f32_16x16x16_bf8_bf8_D0_overlaps_A1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X16_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1, killed $vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X16_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1, killed $vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
 ...
@@ -167,12 +168,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
 
-    ; GFX12-LABEL: name: test_wmma_i32_16x16x32_iu4_D0_overlaps_B1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_I32_16X16X32_IU4_w32_twoaddr 8, killed $vgpr28_vgpr29, 8, killed $vgpr0_vgpr1, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_i32_16x16x32_iu4_D0_overlaps_B1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_I32_16X16X32_IU4_w32_twoaddr 8, killed $vgpr28_vgpr29, 8, killed $vgpr0_vgpr1, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_I32_16X16X32_IU4_w32_twoaddr 8, killed $vgpr28_vgpr29, 8, killed $vgpr0_vgpr1, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, implicit $exec
 ...
@@ -183,11 +184,11 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr48, $vgpr49, $vgpr50
 
-    ; GFX12-LABEL: name: test_swmmac_f32_16x16x32_f16_D0_overlaps_C1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr48, $vgpr49, $vgpr50
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_SWMMAC_F32_16X16X32_F16_w32_twoaddr 8, killed $vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr44, 0, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_swmmac_f32_16x16x32_f16_D0_overlaps_C1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr48, $vgpr49, $vgpr50
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_SWMMAC_F32_16X16X32_F16_w32_twoaddr 8, killed $vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr44, 0, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_SWMMAC_F32_16X16X32_F16_w32_twoaddr 8, killed $vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr44, 0, 0, 0, implicit $exec
 ...
@@ -198,6 +199,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr48, $vgpr49, $vgpr50
 
+    ; GFX1170-LABEL: name: test_swmmac_f32_16x16x32_bf16_D0_overlaps_Index1
+    ; GFX1170: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr48, $vgpr49, $vgpr50
+    ; GFX1170-NEXT: {{  $}}
+    ; GFX1170-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GFX1170-NEXT: early-clobber renamable $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 = V_SWMMAC_F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr0, 0, 0, 0, implicit $exec
+    ;
     ; GFX12-LABEL: name: test_swmmac_f32_16x16x32_bf16_D0_overlaps_Index1
     ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr48, $vgpr49, $vgpr50
     ; GFX12-NEXT: {{  $}}
@@ -214,12 +221,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46
 
-    ; GFX12-LABEL: name: test_swmmac_f16_16x16x32_f16_D0_overlaps_A1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr40_vgpr41_vgpr42_vgpr43 = V_SWMMAC_F16_16X16X32_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43, killed $vgpr44, 0, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_swmmac_f16_16x16x32_f16_D0_overlaps_A1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr40_vgpr41_vgpr42_vgpr43 = V_SWMMAC_F16_16X16X32_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43, killed $vgpr44, 0, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr40_vgpr41_vgpr42_vgpr43 = V_SWMMAC_F16_16X16X32_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43, killed $vgpr44, 0, 0, 0, implicit $exec
 ...
@@ -230,12 +237,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46
 
-    ; GFX12-LABEL: name: test_swmmac_bf16_16x16x32_bf16_D0_overlaps_B1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr40_vgpr41_vgpr42_vgpr43 = V_SWMMAC_BF16_16X16X32_BF16_w32_twoaddr 8, killed $vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr40_vgpr41_vgpr42_vgpr43, killed $vgpr44, 0, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_swmmac_bf16_16x16x32_bf16_D0_overlaps_B1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr40_vgpr41_vgpr42_vgpr43 = V_SWMMAC_BF16_16X16X32_BF16_w32_twoaddr 8, killed $vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr40_vgpr41_vgpr42_vgpr43, killed $vgpr44, 0, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr40_vgpr41_vgpr42_vgpr43 = V_SWMMAC_BF16_16X16X32_BF16_w32_twoaddr 8, killed $vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr40_vgpr41_vgpr42_vgpr43, killed $vgpr44, 0, 0, 0, implicit $exec
 ...
@@ -246,11 +253,11 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
 
-    ; GFX12-LABEL: name: test_swmmac_i32_16x16x32_iu8_D0_overlaps_C1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_SWMMAC_I32_16X16X32_IU8_w32_twoaddr 8, killed $vgpr28_vgpr29, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr42, 0, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_swmmac_i32_16x16x32_iu8_D0_overlaps_C1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_SWMMAC_I32_16X16X32_IU8_w32_twoaddr 8, killed $vgpr28_vgpr29, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr42, 0, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_SWMMAC_I32_16X16X32_IU8_w32_twoaddr 8, killed $vgpr28_vgpr29, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr42, 0, 0, 0, implicit $exec
 ...
@@ -261,6 +268,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
 
+    ; GFX1170-LABEL: name: test_swmmac_i32_16x16x32_iu4_D0_overlaps_Index1
+    ; GFX1170: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
+    ; GFX1170-NEXT: {{  $}}
+    ; GFX1170-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GFX1170-NEXT: early-clobber renamable $vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38 = V_SWMMAC_I32_16X16X32_IU4_w32_twoaddr 8, killed $vgpr28, 8, killed $vgpr28_vgpr29, killed $vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38, killed $vgpr0, 0, 0, 0, implicit $exec
+    ;
     ; GFX12-LABEL: name: test_swmmac_i32_16x16x32_iu4_D0_overlaps_Index1
     ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41
     ; GFX12-NEXT: {{  $}}
@@ -277,12 +290,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
 
-    ; GFX12-LABEL: name: test_swmmac_i32_16x16x64_iu4_D0_overlaps_A1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 = V_SWMMAC_I32_16X16X64_IU4_w32_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, killed $vgpr42, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_swmmac_i32_16x16x64_iu4_D0_overlaps_A1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 = V_SWMMAC_I32_16X16X64_IU4_w32_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, killed $vgpr42, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 = V_SWMMAC_I32_16X16X64_IU4_w32_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, killed $vgpr42, 0, 0, implicit $exec
 ...
@@ -293,12 +306,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
 
-    ; GFX12-LABEL: name: test_swmmac_f32_16x16x32_fp8_fp8_D0_overlaps_B1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 = V_SWMMAC_F32_16X16X32_FP8_FP8_w32_twoaddr killed $vgpr28_vgpr29, killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, killed $vgpr42, 0, implicit $exec
+    ; GCN-LABEL: name: test_swmmac_f32_16x16x32_fp8_fp8_D0_overlaps_B1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 = V_SWMMAC_F32_16X16X32_FP8_FP8_w32_twoaddr killed $vgpr28_vgpr29, killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, killed $vgpr42, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 = V_SWMMAC_F32_16X16X32_FP8_FP8_w32_twoaddr killed $vgpr28_vgpr29, killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, killed $vgpr42, 0, implicit $exec
 ...
@@ -309,11 +322,11 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
 
-    ; GFX12-LABEL: name: test_swmmac_f32_16x16x32_fp8_bf8_D0_overlaps_C1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_SWMMAC_F32_16X16X32_FP8_BF8_w32_twoaddr killed $vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr42, 0, implicit $exec
+    ; GCN-LABEL: name: test_swmmac_f32_16x16x32_fp8_bf8_D0_overlaps_C1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_SWMMAC_F32_16X16X32_FP8_BF8_w32_twoaddr killed $vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr42, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_SWMMAC_F32_16X16X32_FP8_BF8_w32_twoaddr killed $vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr42, 0, implicit $exec
 ...
@@ -324,6 +337,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
 
+    ; GFX1170-LABEL: name: test_swmmac_f32_16x16x32_bf8_fp8_D0_overlaps_Index1
+    ; GFX1170: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
+    ; GFX1170-NEXT: {{  $}}
+    ; GFX1170-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GFX1170-NEXT: early-clobber renamable $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 = V_SWMMAC_F32_16X16X32_BF8_FP8_w32_twoaddr killed $vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, killed $vgpr0, 0, implicit $exec
+    ;
     ; GFX12-LABEL: name: test_swmmac_f32_16x16x32_bf8_fp8_D0_overlaps_Index1
     ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
     ; GFX12-NEXT: {{  $}}
@@ -340,12 +359,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
 
-    ; GFX12-LABEL: name: test_swmmac_f32_16x16x32_bf8_bf8_D0_overlaps_A1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 = V_SWMMAC_F32_16X16X32_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1, killed $vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, killed $vgpr42, 0, implicit $exec
+    ; GCN-LABEL: name: test_swmmac_f32_16x16x32_bf8_bf8_D0_overlaps_A1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 = V_SWMMAC_F32_16X16X32_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1, killed $vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, killed $vgpr42, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec
     early-clobber renamable $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 = V_SWMMAC_F32_16X16X32_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1, killed $vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, killed $vgpr42, 0, implicit $exec
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w64.mir b/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w64.mir
index 4073964e2b038..0a80543b9977d 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w64.mir
+++ b/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx12-w64.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=+wavefrontsize64 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GFX12 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -verify-machineinstrs -mattr=+wavefrontsize64 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX1170 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=+wavefrontsize64 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX12 %s
 
 # D0 overlaps A1, B1, C1 or Index1. Overlap starts at vgpr0.
 #  $D0 = wmma0 $A0, $B0, $C0 or $D0 = swmmac0 $A0, $B0, $C0, $Index0
@@ -11,12 +12,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
 
-    ; GFX12-LABEL: name: test_wmma_f32_16x16x16_f16_D0_overlaps_A1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr18_vgpr19, 8, killed $vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_f32_16x16x16_f16_D0_overlaps_A1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr18_vgpr19, 8, killed $vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr18_vgpr19, 8, killed $vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, implicit $exec
 ...
@@ -27,12 +28,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
 
-    ; GFX12-LABEL: name: test_wmma_f32_16x16x16_bf16_D0_overlaps_B1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X16_BF16_w64_twoaddr 8, killed $vgpr16_vgpr17, 8, killed $vgpr0_vgpr1, 8, killed $vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_f32_16x16x16_bf16_D0_overlaps_B1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X16_BF16_w64_twoaddr 8, killed $vgpr16_vgpr17, 8, killed $vgpr0_vgpr1, 8, killed $vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X16_BF16_w64_twoaddr 8, killed $vgpr16_vgpr17, 8, killed $vgpr0_vgpr1, 8, killed $vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, implicit $exec
 ...
@@ -43,11 +44,11 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
 
-    ; GFX12-LABEL: name: test_wmma_f16_16x16x16_f16_D0_overlaps_C1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1 = V_WMMA_F16_16X16X16_F16_w64_twoaddr 8, killed $vgpr16_vgpr17, 8, killed $vgpr18_vgpr19, 8, killed $vgpr0_vgpr1, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_f16_16x16x16_f16_D0_overlaps_C1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1 = V_WMMA_F16_16X16X16_F16_w64_twoaddr 8, killed $vgpr16_vgpr17, 8, killed $vgpr18_vgpr19, 8, killed $vgpr0_vgpr1, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1 = V_WMMA_F16_16X16X16_F16_w64_twoaddr 8, killed $vgpr16_vgpr17, 8, killed $vgpr18_vgpr19, 8, killed $vgpr0_vgpr1, 0, 0, implicit $exec
 ...
@@ -58,12 +59,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
 
-    ; GFX12-LABEL: name: test_wmma_bf16_16x16x16_bf16_D0_overlaps_A1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr20_vgpr21 = V_WMMA_BF16_16X16X16_BF16_w64_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr18_vgpr19, 8, killed $vgpr20_vgpr21, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_bf16_16x16x16_bf16_D0_overlaps_A1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr20_vgpr21 = V_WMMA_BF16_16X16X16_BF16_w64_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr18_vgpr19, 8, killed $vgpr20_vgpr21, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr20_vgpr21 = V_WMMA_BF16_16X16X16_BF16_w64_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr18_vgpr19, 8, killed $vgpr20_vgpr21, 0, 0, implicit $exec
 ...
@@ -74,12 +75,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
 
-    ; GFX12-LABEL: name: test_wmma_i32_16x16x16_iu8_D0_overlaps_B1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21 = V_WMMA_I32_16X16X16_IU8_w64_twoaddr 8, killed $vgpr16, 8, killed $vgpr0, killed $vgpr18_vgpr19_vgpr20_vgpr21, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_i32_16x16x16_iu8_D0_overlaps_B1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21 = V_WMMA_I32_16X16X16_IU8_w64_twoaddr 8, killed $vgpr16, 8, killed $vgpr0, killed $vgpr18_vgpr19_vgpr20_vgpr21, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21 = V_WMMA_I32_16X16X16_IU8_w64_twoaddr 8, killed $vgpr16, 8, killed $vgpr0, killed $vgpr18_vgpr19_vgpr20_vgpr21, 0, 0, implicit $exec
 ...
@@ -90,11 +91,11 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
 
-    ; GFX12-LABEL: name: test_wmma_i32_16x16x16_iu4_D0_overlaps_C1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_I32_16X16X16_IU4_w64_twoaddr 8, killed $vgpr16, 8, killed $vgpr17, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_i32_16x16x16_iu4_D0_overlaps_C1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_I32_16X16X16_IU4_w64_twoaddr 8, killed $vgpr16, 8, killed $vgpr17, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_I32_16X16X16_IU4_w64_twoaddr 8, killed $vgpr16, 8, killed $vgpr17, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
 ...
@@ -105,12 +106,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
 
-    ; GFX12-LABEL: name: test_wmma_f32_16x16x16_fp8_fp8_D0_overlaps_A1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21 = V_WMMA_F32_16X16X16_FP8_FP8_w64_twoaddr killed $vgpr0, killed $vgpr17, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_f32_16x16x16_fp8_fp8_D0_overlaps_A1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21 = V_WMMA_F32_16X16X16_FP8_FP8_w64_twoaddr killed $vgpr0, killed $vgpr17, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21 = V_WMMA_F32_16X16X16_FP8_FP8_w64_twoaddr killed $vgpr0, killed $vgpr17, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21, 0, 0, implicit $exec
 ...
@@ -121,12 +122,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
 
-    ; GFX12-LABEL: name: test_wmma_f32_16x16x16_bf8_fp8_D0_overlaps_B1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21 = V_WMMA_F32_16X16X16_BF8_FP8_w64_twoaddr killed $vgpr16, killed $vgpr0, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_f32_16x16x16_bf8_fp8_D0_overlaps_B1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21 = V_WMMA_F32_16X16X16_BF8_FP8_w64_twoaddr killed $vgpr16, killed $vgpr0, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21 = V_WMMA_F32_16X16X16_BF8_FP8_w64_twoaddr killed $vgpr16, killed $vgpr0, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21, 0, 0, implicit $exec
 ...
@@ -137,11 +138,11 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
 
-    ; GFX12-LABEL: name: test_wmma_f32_16x16x16_fp8_bf8_D0_overlaps_C1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_FP8_BF8_w64_twoaddr killed $vgpr16, killed $vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_f32_16x16x16_fp8_bf8_D0_overlaps_C1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_FP8_BF8_w64_twoaddr killed $vgpr16, killed $vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_FP8_BF8_w64_twoaddr killed $vgpr16, killed $vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
 ...
@@ -152,12 +153,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
 
-    ; GFX12-LABEL: name: test_wmma_f32_16x16x16_bf8_bf8_D0_overlaps_A1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21 = V_WMMA_F32_16X16X16_BF8_BF8_w64_twoaddr killed $vgpr0, killed $vgpr17, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_f32_16x16x16_bf8_bf8_D0_overlaps_A1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21 = V_WMMA_F32_16X16X16_BF8_BF8_w64_twoaddr killed $vgpr0, killed $vgpr17, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21 = V_WMMA_F32_16X16X16_BF8_BF8_w64_twoaddr killed $vgpr0, killed $vgpr17, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21, 0, 0, implicit $exec
 ...
@@ -168,12 +169,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
 
-    ; GFX12-LABEL: name: test_wmma_i32_16x16x32_iu4_D0_overlaps_B1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21 = V_WMMA_I32_16X16X32_IU4_w64_twoaddr 8, killed $vgpr16, 8, killed $vgpr0, killed $vgpr18_vgpr19_vgpr20_vgpr21, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_wmma_i32_16x16x32_iu4_D0_overlaps_B1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21 = V_WMMA_I32_16X16X32_IU4_w64_twoaddr 8, killed $vgpr16, 8, killed $vgpr0, killed $vgpr18_vgpr19_vgpr20_vgpr21, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21 = V_WMMA_I32_16X16X32_IU4_w64_twoaddr 8, killed $vgpr16, 8, killed $vgpr0, killed $vgpr18_vgpr19_vgpr20_vgpr21, 0, 0, implicit $exec
 ...
@@ -184,11 +185,11 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28
 
-    ; GFX12-LABEL: name: test_swmmac_f32_16x16x32_f16_D0_overlaps_C1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_SWMMAC_F32_16X16X32_F16_w64_twoaddr 8, killed $vgpr16_vgpr17, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21, killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr26, 0, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_swmmac_f32_16x16x32_f16_D0_overlaps_C1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_SWMMAC_F32_16X16X32_F16_w64_twoaddr 8, killed $vgpr16_vgpr17, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21, killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr26, 0, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_SWMMAC_F32_16X16X32_F16_w64_twoaddr 8, killed $vgpr16_vgpr17, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21, killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr26, 0, 0, 0, implicit $exec
 ...
@@ -199,6 +200,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28
 
+    ; GFX1170-LABEL: name: test_swmmac_f32_16x16x32_bf16_D0_overlaps_Index1
+    ; GFX1170: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28
+    ; GFX1170-NEXT: {{  $}}
+    ; GFX1170-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GFX1170-NEXT: early-clobber renamable $vgpr22_vgpr23_vgpr24_vgpr25 = V_SWMMAC_F32_16X16X32_BF16_w64_twoaddr 8, killed $vgpr16_vgpr17, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21, killed $vgpr22_vgpr23_vgpr24_vgpr25, killed $vgpr0, 0, 0, 0, implicit $exec
+    ;
     ; GFX12-LABEL: name: test_swmmac_f32_16x16x32_bf16_D0_overlaps_Index1
     ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28
     ; GFX12-NEXT: {{  $}}
@@ -215,12 +222,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26
 
-    ; GFX12-LABEL: name: test_swmmac_f16_16x16x32_f16_D0_overlaps_A1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr22_vgpr23 = V_SWMMAC_F16_16X16X32_F16_w64_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21, killed $vgpr22_vgpr23, killed $vgpr24, 0, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_swmmac_f16_16x16x32_f16_D0_overlaps_A1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr22_vgpr23 = V_SWMMAC_F16_16X16X32_F16_w64_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21, killed $vgpr22_vgpr23, killed $vgpr24, 0, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr22_vgpr23 = V_SWMMAC_F16_16X16X32_F16_w64_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21, killed $vgpr22_vgpr23, killed $vgpr24, 0, 0, 0, implicit $exec
 ...
@@ -231,12 +238,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26
 
-    ; GFX12-LABEL: name: test_swmmac_bf16_16x16x32_bf16_D0_overlaps_B1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr22_vgpr23 = V_SWMMAC_BF16_16X16X32_BF16_w64_twoaddr 8, killed $vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr22_vgpr23, killed $vgpr24, 0, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_swmmac_bf16_16x16x32_bf16_D0_overlaps_B1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr22_vgpr23 = V_SWMMAC_BF16_16X16X32_BF16_w64_twoaddr 8, killed $vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr22_vgpr23, killed $vgpr24, 0, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr22_vgpr23 = V_SWMMAC_BF16_16X16X32_BF16_w64_twoaddr 8, killed $vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr22_vgpr23, killed $vgpr24, 0, 0, 0, implicit $exec
 ...
@@ -247,11 +254,11 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
 
-    ; GFX12-LABEL: name: test_swmmac_i32_16x16x32_iu8_D0_overlaps_C1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_SWMMAC_I32_16X16X32_IU8_w64_twoaddr 8, killed $vgpr16, 8, killed $vgpr17_vgpr18, killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr23, 0, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_swmmac_i32_16x16x32_iu8_D0_overlaps_C1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_SWMMAC_I32_16X16X32_IU8_w64_twoaddr 8, killed $vgpr16, 8, killed $vgpr17_vgpr18, killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr23, 0, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_SWMMAC_I32_16X16X32_IU8_w64_twoaddr 8, killed $vgpr16, 8, killed $vgpr17_vgpr18, killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr23, 0, 0, 0, implicit $exec
 ...
@@ -262,6 +269,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24
 
+    ; GFX1170-LABEL: name: test_swmmac_i32_16x16x32_iu4_D0_overlaps_Index1
+    ; GFX1170: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24
+    ; GFX1170-NEXT: {{  $}}
+    ; GFX1170-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GFX1170-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21 = V_SWMMAC_I32_16X16X32_IU4_w64_twoaddr 8, killed $vgpr16, 8, killed $vgpr16, killed $vgpr18_vgpr19_vgpr20_vgpr21, killed $vgpr0, 0, 0, 0, implicit $exec
+    ;
     ; GFX12-LABEL: name: test_swmmac_i32_16x16x32_iu4_D0_overlaps_Index1
     ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24
     ; GFX12-NEXT: {{  $}}
@@ -278,12 +291,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
 
-    ; GFX12-LABEL: name: test_swmmac_i32_16x16x64_iu4_D0_overlaps_A1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr19_vgpr20_vgpr21_vgpr22 = V_SWMMAC_I32_16X16X64_IU4_w64_twoaddr 8, killed $vgpr0, 8, killed $vgpr17_vgpr18, killed $vgpr19_vgpr20_vgpr21_vgpr22, killed $vgpr23, 0, 0, 0, implicit $exec
+    ; GCN-LABEL: name: test_swmmac_i32_16x16x64_iu4_D0_overlaps_A1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr19_vgpr20_vgpr21_vgpr22 = V_SWMMAC_I32_16X16X64_IU4_w64_twoaddr 8, killed $vgpr0, 8, killed $vgpr17_vgpr18, killed $vgpr19_vgpr20_vgpr21_vgpr22, killed $vgpr23, 0, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr19_vgpr20_vgpr21_vgpr22 = V_SWMMAC_I32_16X16X64_IU4_w64_twoaddr 8, killed $vgpr0, 8, killed $vgpr17_vgpr18, killed $vgpr19_vgpr20_vgpr21_vgpr22, killed $vgpr23, 0, 0, 0, implicit $exec
 ...
@@ -294,12 +307,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
 
-    ; GFX12-LABEL: name: test_swmmac_f32_16x16x32_fp8_fp8_D0_overlaps_B1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr19_vgpr20_vgpr21_vgpr22 = V_SWMMAC_F32_16X16X32_FP8_FP8_w64_twoaddr killed $vgpr16, killed $vgpr0_vgpr1, killed $vgpr19_vgpr20_vgpr21_vgpr22, killed $vgpr23, 0, implicit $exec
+    ; GCN-LABEL: name: test_swmmac_f32_16x16x32_fp8_fp8_D0_overlaps_B1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr19_vgpr20_vgpr21_vgpr22 = V_SWMMAC_F32_16X16X32_FP8_FP8_w64_twoaddr killed $vgpr16, killed $vgpr0_vgpr1, killed $vgpr19_vgpr20_vgpr21_vgpr22, killed $vgpr23, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr19_vgpr20_vgpr21_vgpr22 = V_SWMMAC_F32_16X16X32_FP8_FP8_w64_twoaddr killed $vgpr16, killed $vgpr0_vgpr1, killed $vgpr19_vgpr20_vgpr21_vgpr22, killed $vgpr23, 0, implicit $exec
 ...
@@ -310,11 +323,11 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
 
-    ; GFX12-LABEL: name: test_swmmac_f32_16x16x32_fp8_bf8_D0_overlaps_C1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_SWMMAC_F32_16X16X32_FP8_BF8_w64_twoaddr killed $vgpr16, killed $vgpr17_vgpr18, killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr23, 0, implicit $exec
+    ; GCN-LABEL: name: test_swmmac_f32_16x16x32_fp8_bf8_D0_overlaps_C1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_SWMMAC_F32_16X16X32_FP8_BF8_w64_twoaddr killed $vgpr16, killed $vgpr17_vgpr18, killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr23, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_SWMMAC_F32_16X16X32_FP8_BF8_w64_twoaddr killed $vgpr16, killed $vgpr17_vgpr18, killed $vgpr0_vgpr1_vgpr2_vgpr3, killed $vgpr23, 0, implicit $exec
 ...
@@ -325,6 +338,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
 
+    ; GFX1170-LABEL: name: test_swmmac_f32_16x16x32_bf8_fp8_D0_overlaps_Index1
+    ; GFX1170: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
+    ; GFX1170-NEXT: {{  $}}
+    ; GFX1170-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GFX1170-NEXT: early-clobber renamable $vgpr19_vgpr20_vgpr21_vgpr22 = V_SWMMAC_F32_16X16X32_BF8_FP8_w64_twoaddr killed $vgpr16, killed $vgpr17_vgpr18, killed $vgpr19_vgpr20_vgpr21_vgpr22, killed $vgpr0, 0, implicit $exec
+    ;
     ; GFX12-LABEL: name: test_swmmac_f32_16x16x32_bf8_fp8_D0_overlaps_Index1
     ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
     ; GFX12-NEXT: {{  $}}
@@ -341,12 +360,12 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
 
-    ; GFX12-LABEL: name: test_swmmac_f32_16x16x32_bf8_bf8_D0_overlaps_A1
-    ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
-    ; GFX12-NEXT: {{  $}}
-    ; GFX12-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
-    ; GFX12-NEXT: V_NOP_e32 implicit $exec
-    ; GFX12-NEXT: early-clobber renamable $vgpr19_vgpr20_vgpr21_vgpr22 = V_SWMMAC_F32_16X16X32_BF8_BF8_w64_twoaddr killed $vgpr0, killed $vgpr17_vgpr18, killed $vgpr19_vgpr20_vgpr21_vgpr22, killed $vgpr23, 0, implicit $exec
+    ; GCN-LABEL: name: test_swmmac_f32_16x16x32_bf8_bf8_D0_overlaps_A1
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
+    ; GCN-NEXT: V_NOP_e32 implicit $exec
+    ; GCN-NEXT: early-clobber renamable $vgpr19_vgpr20_vgpr21_vgpr22 = V_SWMMAC_F32_16X16X32_BF8_BF8_w64_twoaddr killed $vgpr0, killed $vgpr17_vgpr18, killed $vgpr19_vgpr20_vgpr21_vgpr22, killed $vgpr23, 0, implicit $exec
     early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_F32_16X16X16_F16_w64_twoaddr 8, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec
     early-clobber renamable $vgpr19_vgpr20_vgpr21_vgpr22 = V_SWMMAC_F32_16X16X32_BF8_BF8_w64_twoaddr killed $vgpr0, killed $vgpr17_vgpr18, killed $vgpr19_vgpr20_vgpr21_vgpr22, killed $vgpr23, 0, implicit $exec
 ...
diff --git a/llvm/test/MC/AMDGPU/gfx1170_asm_wmma_w32.s b/llvm/test/MC/AMDGPU/gfx1170_asm_wmma_w32.s
new file mode 100644
index 0000000000000..abdb344ac0614
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1170_asm_wmma_w32.s
@@ -0,0 +1,1529 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1170 -show-encoding %s | FileCheck --check-prefix=GFX1170 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1170 %s 2>&1 | FileCheck --check-prefix=GFX1170-ERR --implicit-check-not=error: %s
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15]
+// GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] ; encoding: [0x08,0x40,0x40,0xcc,0x00,0x09,0x22,0x1c]
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,0,0]
+// GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,0,0] ; encoding: [0x08,0x40,0x40,0xcc,0x00,0x09,0x22,0x3c]
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,1,0]
+// GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,1,0] ; encoding: [0x08,0x40,0x40,0xcc,0x00,0x09,0x22,0x5c]
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1] ; encoding: [0x08,0x40,0x40,0xcc,0x00,0x09,0x22,0x9c]
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[1,0,0]
+// GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[1,0,0] ; encoding: [0x08,0x41,0x40,0xcc,0x00,0x09,0x22,0x1c]
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,1,0]
+// GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,1,0] ; encoding: [0x08,0x42,0x40,0xcc,0x00,0x09,0x22,0x1c]
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,0,1] ; encoding: [0x08,0x44,0x40,0xcc,0x00,0x09,0x22,0x1c]
+
+v_wmma_f32_16x16x16_f16 v[8:15], s[0:3], v[4:7], v[8:15]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], s[4:7], v[8:15]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], s[8:15]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_f16 v[8:15], 1.0, v[4:7], v[8:15]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], 1.0, v[8:15]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], 1.0
+// GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], 1.0 ; encoding: [0x08,0x40,0x40,0xcc,0x00,0x09,0xca,0x1b]
+
+v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], 1
+// GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], 1 ; encoding: [0x08,0x40,0x40,0xcc,0x00,0x09,0x06,0x1a]
+
+
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15]
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] ; encoding: [0x08,0x40,0x41,0xcc,0x00,0x09,0x22,0x1c]
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,0,0]
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,0,0] ; encoding: [0x08,0x40,0x41,0xcc,0x00,0x09,0x22,0x3c]
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,1,0]
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,1,0] ; encoding: [0x08,0x40,0x41,0xcc,0x00,0x09,0x22,0x5c]
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1] ; encoding: [0x08,0x40,0x41,0xcc,0x00,0x09,0x22,0x9c]
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[1,0,0]
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[1,0,0] ; encoding: [0x08,0x41,0x41,0xcc,0x00,0x09,0x22,0x1c]
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,1,0]
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,1,0] ; encoding: [0x08,0x42,0x41,0xcc,0x00,0x09,0x22,0x1c]
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,0,1] ; encoding: [0x08,0x44,0x41,0xcc,0x00,0x09,0x22,0x1c]
+
+v_wmma_f32_16x16x16_bf16 v[8:15], s[0:3], v[4:7], v[8:15]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], s[4:7], v[8:15]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], s[8:15]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf16 v[8:15], 1.0, v[4:7], v[8:15]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], 1.0, v[8:15]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], 1.0
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], 1.0 ; encoding: [0x08,0x40,0x41,0xcc,0x00,0x09,0xca,0x1b]
+
+v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], 1
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], 1 ; encoding: [0x08,0x40,0x41,0xcc,0x00,0x09,0x06,0x1a]
+
+
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11]
+// GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] ; encoding: [0x08,0x40,0x42,0xcc,0x00,0x09,0x22,0x1c]
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0]
+// GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0] ; encoding: [0x08,0x40,0x42,0xcc,0x00,0x09,0x22,0x3c]
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0]
+// GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0] ; encoding: [0x08,0x40,0x42,0xcc,0x00,0x09,0x22,0x5c]
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,0,1]
+// GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,0,1] ; encoding: [0x08,0x40,0x42,0xcc,0x00,0x09,0x22,0x9c]
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[1,0,0]
+// GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[1,0,0] ; encoding: [0x08,0x41,0x42,0xcc,0x00,0x09,0x22,0x1c]
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[0,1,0]
+// GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[0,1,0] ; encoding: [0x08,0x42,0x42,0xcc,0x00,0x09,0x22,0x1c]
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[0,0,1]
+// GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[0,0,1] ; encoding: [0x08,0x44,0x42,0xcc,0x00,0x09,0x22,0x1c]
+
+v_wmma_f16_16x16x16_f16 v[8:11], s[0:3], v[4:7], v[8:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], s[4:7], v[8:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], s[8:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x16_f16 v[8:11], 1.0, v[4:7], v[8:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], 1.0, v[8:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], 1.0
+// GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], 1.0 ; encoding: [0x08,0x40,0x42,0xcc,0x00,0x09,0xca,0x1b]
+
+v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], 1
+// GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], 1 ; encoding: [0x08,0x40,0x42,0xcc,0x00,0x09,0x06,0x1a]
+
+
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11]
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] ; encoding: [0x08,0x40,0x43,0xcc,0x00,0x09,0x22,0x1c]
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0]
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0] ; encoding: [0x08,0x40,0x43,0xcc,0x00,0x09,0x22,0x3c]
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0]
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0] ; encoding: [0x08,0x40,0x43,0xcc,0x00,0x09,0x22,0x5c]
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,0,1]
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,0,1] ; encoding: [0x08,0x40,0x43,0xcc,0x00,0x09,0x22,0x9c]
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[1,0,0]
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[1,0,0] ; encoding: [0x08,0x41,0x43,0xcc,0x00,0x09,0x22,0x1c]
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[0,1,0]
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[0,1,0] ; encoding: [0x08,0x42,0x43,0xcc,0x00,0x09,0x22,0x1c]
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[0,0,1]
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[0,0,1] ; encoding: [0x08,0x44,0x43,0xcc,0x00,0x09,0x22,0x1c]
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], s[0:3], v[4:7], v[8:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], s[4:7], v[8:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], s[8:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], 1.0, v[4:7], v[8:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], 1.0, v[8:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], 1.0
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], 1.0 ; encoding: [0x08,0x40,0x43,0xcc,0x00,0x09,0xca,0x1b]
+
+v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], 1
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], 1 ; encoding: [0x08,0x40,0x43,0xcc,0x00,0x09,0x06,0x1a]
+
+
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11]
+// GFX1170: v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] ; encoding: [0x04,0x40,0x44,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] clamp
+// GFX1170: v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] clamp ; encoding: [0x04,0xc0,0x44,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0]
+// GFX1170: v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0] ; encoding: [0x04,0x40,0x44,0xcc,0x00,0x05,0x12,0x3c]
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0]
+// GFX1170: v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0] ; encoding: [0x04,0x40,0x44,0xcc,0x00,0x05,0x12,0x5c]
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[4:11], s[0:1], v[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], s[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], s[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu8 v[4:11], 1, v[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], 1, v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], 1
+// GFX1170: v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x44,0xcc,0x00,0x05,0x06,0x1a]
+
+v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], 1.0
+// GFX1170: v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x44,0xcc,0x00,0x05,0xca,0x1b]
+
+
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9]
+// GFX1170: v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x1c]
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] clamp
+// GFX1170: v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] clamp ; encoding: [0x02,0xc0,0x45,0xcc,0x00,0x03,0x0a,0x1c]
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_lo:[1,0,0]
+// GFX1170: v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_lo:[1,0,0] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x3c]
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_lo:[0,1,0]
+// GFX1170: v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_lo:[0,1,0] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x5c]
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:9], s0, v1, v[2:9]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, s1, v[2:9]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, s[0:7]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu4 v[2:9], 1, v1, v[2:9]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, 1, v[2:9]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, 1
+// GFX1170: v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, 1 ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x06,0x1a]
+
+v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, 1.0
+// GFX1170: v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, 1.0 ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0xca,0x1b]
+
+
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11]
+// GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] ; encoding: [0x04,0x40,0x46,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x46,0xcc,0x00,0x05,0x12,0x9c]
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x46,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], s[0:1], v[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], s[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], s[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], 1.0, v[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], 1.0, v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], 1.0
+// GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x46,0xcc,0x00,0x05,0xca,0x1b]
+
+v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], 1
+// GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x46,0xcc,0x00,0x05,0x06,0x1a]
+
+
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11]
+// GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] ; encoding: [0x04,0x40,0x48,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x48,0xcc,0x00,0x05,0x12,0x9c]
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x48,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], s[0:1], v[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], s[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], s[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], 1.0, v[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], 1.0, v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], 1.0
+// GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x48,0xcc,0x00,0x05,0xca,0x1b]
+
+v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], 1
+// GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x48,0xcc,0x00,0x05,0x06,0x1a]
+
+
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11]
+// GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] ; encoding: [0x04,0x40,0x47,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x47,0xcc,0x00,0x05,0x12,0x9c]
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x47,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], s[0:1], v[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], s[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], s[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], 1.0, v[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], 1.0, v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], 1.0
+// GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x47,0xcc,0x00,0x05,0xca,0x1b]
+
+v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], 1
+// GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x47,0xcc,0x00,0x05,0x06,0x1a]
+
+
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11]
+// GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] ; encoding: [0x04,0x40,0x49,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x49,0xcc,0x00,0x05,0x12,0x9c]
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x49,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], s[0:1], v[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], s[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], s[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], 1.0, v[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], 1.0, v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], 1.0
+// GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x49,0xcc,0x00,0x05,0xca,0x1b]
+
+v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], 1
+// GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x49,0xcc,0x00,0x05,0x06,0x1a]
+
+
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11]
+// GFX1170: v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] ; encoding: [0x04,0x40,0x4a,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] clamp
+// GFX1170: v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] clamp ; encoding: [0x04,0xc0,0x4a,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0]
+// GFX1170: v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0] ; encoding: [0x04,0x40,0x4a,0xcc,0x00,0x05,0x12,0x3c]
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0]
+// GFX1170: v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0] ; encoding: [0x04,0x40,0x4a,0xcc,0x00,0x05,0x12,0x5c]
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[4:11], s[0:1], v[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], s[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], s[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x32_iu4 v[4:11], 1, v[2:3], v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], 1, v[4:11]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], 1
+// GFX1170: v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x4a,0xcc,0x00,0x05,0x06,0x1a]
+
+v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], 1.0
+// GFX1170: v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x4a,0xcc,0x00,0x05,0xca,0x1b]
+
+
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20
+// GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 ; encoding: [0x0c,0x40,0x50,0xcc,0x00,0x09,0x52,0x1c]
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 index_key:1
+// GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 index_key:1 ; encoding: [0x0c,0x48,0x50,0xcc,0x00,0x09,0x52,0x1c]
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 index_key:2
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 index_key:3
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[1,0,0]
+// GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[1,0,0] ; encoding: [0x0c,0x40,0x50,0xcc,0x00,0x09,0x52,0x3c]
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[0,1,0]
+// GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[0,1,0] ; encoding: [0x0c,0x40,0x50,0xcc,0x00,0x09,0x52,0x5c]
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_hi:[1,0,0]
+// GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_hi:[1,0,0] ; encoding: [0x0c,0x41,0x50,0xcc,0x00,0x09,0x52,0x1c]
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_hi:[0,1,0]
+// GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_hi:[0,1,0] ; encoding: [0x0c,0x42,0x50,0xcc,0x00,0x09,0x52,0x1c]
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_swmmac_f32_16x16x32_f16 v[12:19], s[0:3], v[4:11], v20
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], s[4:11], v20
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], s20
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_f16 v[12:19], 1.0, v[4:11], v20
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], 1.0, v20
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20
+// GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 ; encoding: [0x0c,0x40,0x51,0xcc,0x00,0x09,0x52,0x1c]
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 index_key:1
+// GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 index_key:1 ; encoding: [0x0c,0x48,0x51,0xcc,0x00,0x09,0x52,0x1c]
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 index_key:2
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 index_key:3
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[1,0,0]
+// GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[1,0,0] ; encoding: [0x0c,0x40,0x51,0xcc,0x00,0x09,0x52,0x3c]
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[0,1,0]
+// GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[0,1,0] ; encoding: [0x0c,0x40,0x51,0xcc,0x00,0x09,0x52,0x5c]
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 neg_hi:[1,0,0]
+// GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 neg_hi:[1,0,0] ; encoding: [0x0c,0x41,0x51,0xcc,0x00,0x09,0x52,0x1c]
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 neg_hi:[0,1,0]
+// GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 neg_hi:[0,1,0] ; encoding: [0x0c,0x42,0x51,0xcc,0x00,0x09,0x52,0x1c]
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], s[0:3], v[4:11], v20
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], s[4:11], v20
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], s20
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], 1.0, v[4:11], v20
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], 1.0, v20
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16
+// GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 ; encoding: [0x0c,0x40,0x52,0xcc,0x00,0x09,0x42,0x1c]
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 index_key:1
+// GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 index_key:1 ; encoding: [0x0c,0x48,0x52,0xcc,0x00,0x09,0x42,0x1c]
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 index_key:2
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 index_key:3
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[1,0,0]
+// GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[1,0,0] ; encoding: [0x0c,0x40,0x52,0xcc,0x00,0x09,0x42,0x3c]
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[0,1,0]
+// GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[0,1,0] ; encoding: [0x0c,0x40,0x52,0xcc,0x00,0x09,0x42,0x5c]
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_hi:[1,0,0]
+// GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_hi:[1,0,0] ; encoding: [0x0c,0x41,0x52,0xcc,0x00,0x09,0x42,0x1c]
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_hi:[0,1,0]
+// GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_hi:[0,1,0] ; encoding: [0x0c,0x42,0x52,0xcc,0x00,0x09,0x42,0x1c]
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_swmmac_f16_16x16x32_f16 v[12:15], s[0:3], v[4:11], v16
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], s[4:11], v16
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], s16
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x32_f16 v[12:15], 1.0, v[4:11], v16
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], 1.0, v16
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16
+// GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 ; encoding: [0x0c,0x40,0x53,0xcc,0x00,0x09,0x42,0x1c]
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 index_key:1
+// GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 index_key:1 ; encoding: [0x0c,0x48,0x53,0xcc,0x00,0x09,0x42,0x1c]
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 index_key:2
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 index_key:3
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[1,0,0]
+// GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[1,0,0] ; encoding: [0x0c,0x40,0x53,0xcc,0x00,0x09,0x42,0x3c]
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[0,1,0]
+// GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[0,1,0] ; encoding: [0x0c,0x40,0x53,0xcc,0x00,0x09,0x42,0x5c]
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 neg_hi:[1,0,0]
+// GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 neg_hi:[1,0,0] ; encoding: [0x0c,0x41,0x53,0xcc,0x00,0x09,0x42,0x1c]
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 neg_hi:[0,1,0]
+// GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 neg_hi:[0,1,0] ; encoding: [0x0c,0x42,0x53,0xcc,0x00,0x09,0x42,0x1c]
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], s[0:3], v[4:11], v16
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], s[4:11], v16
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], s16
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], 1.0, v[4:11], v16
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], 1.0, v16
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14
+// GFX1170: v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 ; encoding: [0x06,0x40,0x54,0xcc,0x00,0x05,0x3a,0x1c]
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 clamp
+// GFX1170: v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 clamp ; encoding: [0x06,0xc0,0x54,0xcc,0x00,0x05,0x3a,0x1c]
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 index_key:1
+// GFX1170: v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 index_key:1 ; encoding: [0x06,0x48,0x54,0xcc,0x00,0x05,0x3a,0x1c]
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 index_key:2
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 index_key:3
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0]
+// GFX1170: v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0] ; encoding: [0x06,0x40,0x54,0xcc,0x00,0x05,0x3a,0x3c]
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0]
+// GFX1170: v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0] ; encoding: [0x06,0x40,0x54,0xcc,0x00,0x05,0x3a,0x5c]
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], s[0:1], v[2:5], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], s[0:3], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], s14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], 1, v[2:5], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], 1, v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11
+// GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 ; encoding: [0x03,0x40,0x55,0xcc,0x00,0x03,0x2e,0x1c]
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 clamp
+// GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 clamp ; encoding: [0x03,0xc0,0x55,0xcc,0x00,0x03,0x2e,0x1c]
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 index_key:1
+// GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 index_key:1 ; encoding: [0x03,0x48,0x55,0xcc,0x00,0x03,0x2e,0x1c]
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 index_key:2
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 index_key:3
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_lo:[1,0,0]
+// GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_lo:[1,0,0] ; encoding: [0x03,0x40,0x55,0xcc,0x00,0x03,0x2e,0x3c]
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_lo:[0,1,0]
+// GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_lo:[0,1,0] ; encoding: [0x03,0x40,0x55,0xcc,0x00,0x03,0x2e,0x5c]
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], s0, v[1:2], v11
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, s[0:1], v11
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], s11
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], 1, v[1:2], v11
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, 1, v11
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14
+// GFX1170: v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 ; encoding: [0x06,0x40,0x56,0xcc,0x00,0x05,0x3a,0x1c]
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 clamp
+// GFX1170: v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 clamp ; encoding: [0x06,0xc0,0x56,0xcc,0x00,0x05,0x3a,0x1c]
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 index_key:1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 index_key:2
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 index_key:3
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0]
+// GFX1170: v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0] ; encoding: [0x06,0x40,0x56,0xcc,0x00,0x05,0x3a,0x3c]
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0]
+// GFX1170: v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0] ; encoding: [0x06,0x40,0x56,0xcc,0x00,0x05,0x3a,0x5c]
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], s[0:1], v[2:5], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], s[0:3], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], s14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], 1, v[2:5], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], 1, v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14
+// GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 ; encoding: [0x06,0x40,0x57,0xcc,0x00,0x05,0x3a,0x1c]
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:1
+// GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:1 ; encoding: [0x06,0x48,0x57,0xcc,0x00,0x05,0x3a,0x1c]
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:2
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:3
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], s[0:1], v[2:5], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], s[0:3], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], s14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], 1.0, v[2:5], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], 1.0, v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14
+// GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 ; encoding: [0x06,0x40,0x58,0xcc,0x00,0x05,0x3a,0x1c]
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:1
+// GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:1 ; encoding: [0x06,0x48,0x58,0xcc,0x00,0x05,0x3a,0x1c]
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:2
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:3
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], s[0:1], v[2:5], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], s[0:3], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], s14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], 1.0, v[2:5], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], 1.0, v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14
+// GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 ; encoding: [0x06,0x40,0x59,0xcc,0x00,0x05,0x3a,0x1c]
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:1
+// GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:1 ; encoding: [0x06,0x48,0x59,0xcc,0x00,0x05,0x3a,0x1c]
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:2
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:3
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], s[0:1], v[2:5], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], s[0:3], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], s14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], 1.0, v[2:5], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], 1.0, v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14
+// GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 ; encoding: [0x06,0x40,0x5a,0xcc,0x00,0x05,0x3a,0x1c]
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:1
+// GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:1 ; encoding: [0x06,0x48,0x5a,0xcc,0x00,0x05,0x3a,0x1c]
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:2
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:3
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], s[0:1], v[2:5], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], s[0:3], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], s14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], 1.0, v[2:5], v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], 1.0, v14
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/llvm/test/MC/AMDGPU/gfx1170_asm_wmma_w64.s b/llvm/test/MC/AMDGPU/gfx1170_asm_wmma_w64.s
new file mode 100644
index 0000000000000..6b1b889f8bedd
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1170_asm_wmma_w64.s
@@ -0,0 +1,1529 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1170 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX1170 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1170 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=GFX1170-ERR --implicit-check-not=error: %s
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7]
+// GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] ; encoding: [0x04,0x40,0x40,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0]
+// GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0] ; encoding: [0x04,0x40,0x40,0xcc,0x00,0x05,0x12,0x3c]
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0]
+// GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0] ; encoding: [0x04,0x40,0x40,0xcc,0x00,0x05,0x12,0x5c]
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x40,0xcc,0x00,0x05,0x12,0x9c]
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[1,0,0]
+// GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[1,0,0] ; encoding: [0x04,0x41,0x40,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,1,0]
+// GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,1,0] ; encoding: [0x04,0x42,0x40,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x40,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f32_16x16x16_f16 v[4:7], s[0:1], v[2:3], v[4:7]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], s[2:3], v[4:7]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], s[4:7]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_f16 v[4:7], 1.0, v[2:3], v[4:7]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], 1.0, v[4:7]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], 1.0
+// GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x40,0xcc,0x00,0x05,0xca,0x1b]
+
+v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], 1
+// GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x40,0xcc,0x00,0x05,0x06,0x1a]
+
+
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7]
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] ; encoding: [0x04,0x40,0x41,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0]
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0] ; encoding: [0x04,0x40,0x41,0xcc,0x00,0x05,0x12,0x3c]
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0]
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0] ; encoding: [0x04,0x40,0x41,0xcc,0x00,0x05,0x12,0x5c]
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x41,0xcc,0x00,0x05,0x12,0x9c]
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[1,0,0]
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[1,0,0] ; encoding: [0x04,0x41,0x41,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,1,0]
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,1,0] ; encoding: [0x04,0x42,0x41,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x41,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f32_16x16x16_bf16 v[4:7], s[0:1], v[2:3], v[4:7]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], s[2:3], v[4:7]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], s[4:7]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf16 v[4:7], 1.0, v[2:3], v[4:7]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], 1.0, v[4:7]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], 1.0
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x41,0xcc,0x00,0x05,0xca,0x1b]
+
+v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], 1
+// GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x41,0xcc,0x00,0x05,0x06,0x1a]
+
+
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5]
+// GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] ; encoding: [0x04,0x40,0x42,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[1,0,0]
+// GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[1,0,0] ; encoding: [0x04,0x40,0x42,0xcc,0x00,0x05,0x12,0x3c]
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,1,0]
+// GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,1,0] ; encoding: [0x04,0x40,0x42,0xcc,0x00,0x05,0x12,0x5c]
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1]
+// GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x42,0xcc,0x00,0x05,0x12,0x9c]
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[1,0,0]
+// GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[1,0,0] ; encoding: [0x04,0x41,0x42,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[0,1,0]
+// GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[0,1,0] ; encoding: [0x04,0x42,0x42,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[0,0,1]
+// GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x42,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_f16_16x16x16_f16 v[4:5], s[0:1], v[2:3], v[4:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], s[2:3], v[4:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], s[4:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x16_f16 v[4:5], 1.0, v[2:3], v[4:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], 1.0, v[4:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], 1.0
+// GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x42,0xcc,0x00,0x05,0xca,0x1b]
+
+v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], 1
+// GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x42,0xcc,0x00,0x05,0x06,0x1a]
+
+
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5]
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] ; encoding: [0x04,0x40,0x43,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[1,0,0]
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[1,0,0] ; encoding: [0x04,0x40,0x43,0xcc,0x00,0x05,0x12,0x3c]
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,1,0]
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,1,0] ; encoding: [0x04,0x40,0x43,0xcc,0x00,0x05,0x12,0x5c]
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1]
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x43,0xcc,0x00,0x05,0x12,0x9c]
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[1,0,0]
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[1,0,0] ; encoding: [0x04,0x41,0x43,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[0,1,0]
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[0,1,0] ; encoding: [0x04,0x42,0x43,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[0,0,1]
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x43,0xcc,0x00,0x05,0x12,0x1c]
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], s[0:1], v[2:3], v[4:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], s[2:3], v[4:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], s[4:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], 1.0, v[2:3], v[4:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], 1.0, v[4:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], 1.0
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x43,0xcc,0x00,0x05,0xca,0x1b]
+
+v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], 1
+// GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x43,0xcc,0x00,0x05,0x06,0x1a]
+
+
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5]
+// GFX1170: v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] ; encoding: [0x02,0x40,0x44,0xcc,0x00,0x03,0x0a,0x1c]
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] clamp
+// GFX1170: v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] clamp ; encoding: [0x02,0xc0,0x44,0xcc,0x00,0x03,0x0a,0x1c]
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
+// GFX1170: v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0] ; encoding: [0x02,0x40,0x44,0xcc,0x00,0x03,0x0a,0x3c]
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
+// GFX1170: v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0] ; encoding: [0x02,0x40,0x44,0xcc,0x00,0x03,0x0a,0x5c]
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu8 v[2:5], s0, v1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, s1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, s[0:3]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu8 v[2:5], 1, v1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, 1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, 1
+// GFX1170: v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, 1 ; encoding: [0x02,0x40,0x44,0xcc,0x00,0x03,0x06,0x1a]
+
+v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, 1.0
+// GFX1170: v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, 1.0 ; encoding: [0x02,0x40,0x44,0xcc,0x00,0x03,0xca,0x1b]
+
+
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5]
+// GFX1170: v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x1c]
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] clamp
+// GFX1170: v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] clamp ; encoding: [0x02,0xc0,0x45,0xcc,0x00,0x03,0x0a,0x1c]
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
+// GFX1170: v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x3c]
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
+// GFX1170: v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x5c]
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x16_iu4 v[2:5], s0, v1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, s1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, s[0:3]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu4 v[2:5], 1, v1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, 1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, 1
+// GFX1170: v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, 1 ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x06,0x1a]
+
+v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, 1.0
+// GFX1170: v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, 1.0 ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0xca,0x1b]
+
+
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5]
+// GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] ; encoding: [0x02,0x40,0x46,0xcc,0x00,0x03,0x0a,0x1c]
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1] ; encoding: [0x02,0x40,0x46,0xcc,0x00,0x03,0x0a,0x9c]
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1] ; encoding: [0x02,0x44,0x46,0xcc,0x00,0x03,0x0a,0x1c]
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], s0, v1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, s1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, s[0:3]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], 1.0, v1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, 1.0, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, 1.0
+// GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, 1.0 ; encoding: [0x02,0x40,0x46,0xcc,0x00,0x03,0xca,0x1b]
+
+v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, 1
+// GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, 1 ; encoding: [0x02,0x40,0x46,0xcc,0x00,0x03,0x06,0x1a]
+
+
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5]
+// GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] ; encoding: [0x02,0x40,0x47,0xcc,0x00,0x03,0x0a,0x1c]
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1] ; encoding: [0x02,0x40,0x47,0xcc,0x00,0x03,0x0a,0x9c]
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1] ; encoding: [0x02,0x44,0x47,0xcc,0x00,0x03,0x0a,0x1c]
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], s0, v1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, s1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, s[0:3]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], 1.0, v1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, 1.0, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, 1.0
+// GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, 1.0 ; encoding: [0x02,0x40,0x47,0xcc,0x00,0x03,0xca,0x1b]
+
+v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, 1
+// GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, 1 ; encoding: [0x02,0x40,0x47,0xcc,0x00,0x03,0x06,0x1a]
+
+
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5]
+// GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] ; encoding: [0x02,0x40,0x48,0xcc,0x00,0x03,0x0a,0x1c]
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1] ; encoding: [0x02,0x40,0x48,0xcc,0x00,0x03,0x0a,0x9c]
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1] ; encoding: [0x02,0x44,0x48,0xcc,0x00,0x03,0x0a,0x1c]
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], s0, v1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, s1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, s[0:3]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], 1.0, v1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, 1.0, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, 1.0
+// GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, 1.0 ; encoding: [0x02,0x40,0x48,0xcc,0x00,0x03,0xca,0x1b]
+
+v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, 1
+// GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, 1 ; encoding: [0x02,0x40,0x48,0xcc,0x00,0x03,0x06,0x1a]
+
+
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5]
+// GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] ; encoding: [0x02,0x40,0x49,0xcc,0x00,0x03,0x0a,0x1c]
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1] ; encoding: [0x02,0x40,0x49,0xcc,0x00,0x03,0x0a,0x9c]
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
+// GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1] ; encoding: [0x02,0x44,0x49,0xcc,0x00,0x03,0x0a,0x1c]
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], s0, v1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, s1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, s[0:3]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], 1.0, v1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, 1.0, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, 1.0
+// GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, 1.0 ; encoding: [0x02,0x40,0x49,0xcc,0x00,0x03,0xca,0x1b]
+
+v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, 1
+// GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, 1 ; encoding: [0x02,0x40,0x49,0xcc,0x00,0x03,0x06,0x1a]
+
+
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5]
+// GFX1170: v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] ; encoding: [0x02,0x40,0x4a,0xcc,0x00,0x03,0x0a,0x1c]
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] clamp
+// GFX1170: v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] clamp ; encoding: [0x02,0xc0,0x4a,0xcc,0x00,0x03,0x0a,0x1c]
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0]
+// GFX1170: v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0] ; encoding: [0x02,0x40,0x4a,0xcc,0x00,0x03,0x0a,0x3c]
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0]
+// GFX1170: v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0] ; encoding: [0x02,0x40,0x4a,0xcc,0x00,0x03,0x0a,0x5c]
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_wmma_i32_16x16x32_iu4 v[2:5], s0, v1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, s1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, s[0:3]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x32_iu4 v[2:5], 1.0, v1, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, 1.0, v[2:5]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, 1.0
+// GFX1170: v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, 1.0 ; encoding: [0x02,0x40,0x4a,0xcc,0x00,0x03,0xca,0x1b]
+
+v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, 1
+// GFX1170: v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, 1 ; encoding: [0x02,0x40,0x4a,0xcc,0x00,0x03,0x06,0x1a]
+
+
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10
+// GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 ; encoding: [0x06,0x40,0x50,0xcc,0x00,0x05,0x2a,0x1c]
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 index_key:1
+// GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 index_key:1 ; encoding: [0x06,0x48,0x50,0xcc,0x00,0x05,0x2a,0x1c]
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 index_key:2
+// GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 index_key:2 ; encoding: [0x06,0x50,0x50,0xcc,0x00,0x05,0x2a,0x1c]
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 index_key:3
+// GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 index_key:3 ; encoding: [0x06,0x58,0x50,0xcc,0x00,0x05,0x2a,0x1c]
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[1,0,0]
+// GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[1,0,0] ; encoding: [0x06,0x40,0x50,0xcc,0x00,0x05,0x2a,0x3c]
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[0,1,0]
+// GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[0,1,0] ; encoding: [0x06,0x40,0x50,0xcc,0x00,0x05,0x2a,0x5c]
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_hi:[1,0,0]
+// GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_hi:[1,0,0] ; encoding: [0x06,0x41,0x50,0xcc,0x00,0x05,0x2a,0x1c]
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_hi:[0,1,0]
+// GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_hi:[0,1,0] ; encoding: [0x06,0x42,0x50,0xcc,0x00,0x05,0x2a,0x1c]
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_swmmac_f32_16x16x32_f16 v[6:9], s[0:1], v[2:5], v10
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], s[0:3], v10
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], s10
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_f16 v[6:9], 1.0, v[2:5], v10
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], 1.0, v10
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10
+// GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 ; encoding: [0x06,0x40,0x51,0xcc,0x00,0x05,0x2a,0x1c]
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 index_key:1
+// GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 index_key:1 ; encoding: [0x06,0x48,0x51,0xcc,0x00,0x05,0x2a,0x1c]
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 index_key:2
+// GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 index_key:2 ; encoding: [0x06,0x50,0x51,0xcc,0x00,0x05,0x2a,0x1c]
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 index_key:3
+// GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 index_key:3 ; encoding: [0x06,0x58,0x51,0xcc,0x00,0x05,0x2a,0x1c]
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[1,0,0]
+// GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[1,0,0] ; encoding: [0x06,0x40,0x51,0xcc,0x00,0x05,0x2a,0x3c]
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[0,1,0]
+// GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[0,1,0] ; encoding: [0x06,0x40,0x51,0xcc,0x00,0x05,0x2a,0x5c]
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 neg_hi:[1,0,0]
+// GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 neg_hi:[1,0,0] ; encoding: [0x06,0x41,0x51,0xcc,0x00,0x05,0x2a,0x1c]
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 neg_hi:[0,1,0]
+// GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 neg_hi:[0,1,0] ; encoding: [0x06,0x42,0x51,0xcc,0x00,0x05,0x2a,0x1c]
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], s[0:1], v[2:5], v10
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], s[0:3], v10
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], s10
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], 1.0, v[2:5], v10
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], 1.0, v10
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8
+// GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 ; encoding: [0x06,0x40,0x52,0xcc,0x00,0x05,0x22,0x1c]
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 index_key:1
+// GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 index_key:1 ; encoding: [0x06,0x48,0x52,0xcc,0x00,0x05,0x22,0x1c]
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 index_key:2
+// GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 index_key:2 ; encoding: [0x06,0x50,0x52,0xcc,0x00,0x05,0x22,0x1c]
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 index_key:3
+// GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 index_key:3 ; encoding: [0x06,0x58,0x52,0xcc,0x00,0x05,0x22,0x1c]
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[1,0,0]
+// GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[1,0,0] ; encoding: [0x06,0x40,0x52,0xcc,0x00,0x05,0x22,0x3c]
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[0,1,0]
+// GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[0,1,0] ; encoding: [0x06,0x40,0x52,0xcc,0x00,0x05,0x22,0x5c]
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_hi:[1,0,0]
+// GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_hi:[1,0,0] ; encoding: [0x06,0x41,0x52,0xcc,0x00,0x05,0x22,0x1c]
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_hi:[0,1,0]
+// GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_hi:[0,1,0] ; encoding: [0x06,0x42,0x52,0xcc,0x00,0x05,0x22,0x1c]
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_swmmac_f16_16x16x32_f16 v[6:7], s[0:1], v[2:5], v8
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], s[0:3], v8
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], s8
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x32_f16 v[6:7], 1.0, v[2:5], v8
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], 1.0, v8
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8
+// GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 ; encoding: [0x06,0x40,0x53,0xcc,0x00,0x05,0x22,0x1c]
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 index_key:1
+// GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 index_key:1 ; encoding: [0x06,0x48,0x53,0xcc,0x00,0x05,0x22,0x1c]
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 index_key:2
+// GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 index_key:2 ; encoding: [0x06,0x50,0x53,0xcc,0x00,0x05,0x22,0x1c]
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 index_key:3
+// GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 index_key:3 ; encoding: [0x06,0x58,0x53,0xcc,0x00,0x05,0x22,0x1c]
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[1,0,0]
+// GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[1,0,0] ; encoding: [0x06,0x40,0x53,0xcc,0x00,0x05,0x22,0x3c]
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[0,1,0]
+// GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[0,1,0] ; encoding: [0x06,0x40,0x53,0xcc,0x00,0x05,0x22,0x5c]
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 neg_hi:[1,0,0]
+// GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 neg_hi:[1,0,0] ; encoding: [0x06,0x41,0x53,0xcc,0x00,0x05,0x22,0x1c]
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 neg_hi:[0,1,0]
+// GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 neg_hi:[0,1,0] ; encoding: [0x06,0x42,0x53,0xcc,0x00,0x05,0x22,0x1c]
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], s[0:1], v[2:5], v8
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], s[0:3], v8
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], s8
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], 1.0, v[2:5], v8
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], 1.0, v8
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7
+// GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 ; encoding: [0x03,0x40,0x54,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 clamp
+// GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 clamp ; encoding: [0x03,0xc0,0x54,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 index_key:1
+// GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x54,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 index_key:2
+// GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 index_key:2 ; encoding: [0x03,0x50,0x54,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 index_key:3
+// GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 index_key:3 ; encoding: [0x03,0x58,0x54,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0]
+// GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0] ; encoding: [0x03,0x40,0x54,0xcc,0x00,0x03,0x1e,0x3c]
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0]
+// GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0] ; encoding: [0x03,0x40,0x54,0xcc,0x00,0x03,0x1e,0x5c]
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], s0, v[1:2], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, s[0:1], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], s7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], 1, v[1:2], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, 1, v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6
+// GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 ; encoding: [0x02,0x40,0x55,0xcc,0x00,0x03,0x1a,0x1c]
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 clamp
+// GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 clamp ; encoding: [0x02,0xc0,0x55,0xcc,0x00,0x03,0x1a,0x1c]
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 index_key:1
+// GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 index_key:1 ; encoding: [0x02,0x48,0x55,0xcc,0x00,0x03,0x1a,0x1c]
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 index_key:2
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 index_key:3
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_lo:[1,0,0]
+// GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_lo:[1,0,0] ; encoding: [0x02,0x40,0x55,0xcc,0x00,0x03,0x1a,0x3c]
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_lo:[0,1,0]
+// GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_lo:[0,1,0] ; encoding: [0x02,0x40,0x55,0xcc,0x00,0x03,0x1a,0x5c]
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], s0, v1, v6
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, s1, v6
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, s6
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], 1, v1, v6
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, 1, v6
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7
+// GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 ; encoding: [0x03,0x40,0x56,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 clamp
+// GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 clamp ; encoding: [0x03,0xc0,0x56,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 index_key:1
+// GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x56,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 index_key:2
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 index_key:3
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: out of range index_key
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0]
+// GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0] ; encoding: [0x03,0x40,0x56,0xcc,0x00,0x03,0x1e,0x3c]
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0]
+// GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0] ; encoding: [0x03,0x40,0x56,0xcc,0x00,0x03,0x1e,0x5c]
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], s0, v[1:2], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, s[0:1], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], s7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], 1, v[1:2], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, 1, v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7
+// GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 ; encoding: [0x03,0x40,0x57,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 index_key:1
+// GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x57,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 index_key:2
+// GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 index_key:2 ; encoding: [0x03,0x50,0x57,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 index_key:3
+// GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 index_key:3 ; encoding: [0x03,0x58,0x57,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], s0, v[1:2], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, s[0:1], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], s7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], 1.0, v[1:2], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, 1.0, v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7
+// GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 ; encoding: [0x03,0x40,0x58,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 index_key:1
+// GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x58,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 index_key:2
+// GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 index_key:2 ; encoding: [0x03,0x50,0x58,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 index_key:3
+// GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 index_key:3 ; encoding: [0x03,0x58,0x58,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], s0, v[1:2], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, s[0:1], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], s7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], 1.0, v[1:2], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, 1.0, v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7
+// GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 ; encoding: [0x03,0x40,0x59,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 index_key:1
+// GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x59,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 index_key:2
+// GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 index_key:2 ; encoding: [0x03,0x50,0x59,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 index_key:3
+// GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 index_key:3 ; encoding: [0x03,0x58,0x59,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], s0, v[1:2], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, s[0:1], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], s7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], 1.0, v[1:2], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, 1.0, v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7
+// GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 ; encoding: [0x03,0x40,0x5a,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 clamp
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 op_sel:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 op_sel:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 op_sel:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 op_sel_hi:[0,1,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 op_sel_hi:[1,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 op_sel_hi:[1,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 index_key:1
+// GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x5a,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 index_key:2
+// GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 index_key:2 ; encoding: [0x03,0x50,0x5a,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 index_key:3
+// GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 index_key:3 ; encoding: [0x03,0x58,0x5a,0xcc,0x00,0x03,0x1e,0x1c]
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 neg_lo:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 neg_hi:[1,0,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 neg_hi:[0,1,0]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 neg_hi:[0,0,1]
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], s0, v[1:2], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, s[0:1], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], s7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], 1.0, v[1:2], v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, 1.0, v7
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], 1.0
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], 1
+// GFX1170-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s
index 363db1a16b170..a96e9c4c07873 100644
--- a/llvm/test/MC/AMDGPU/literals.s
+++ b/llvm/test/MC/AMDGPU/literals.s
@@ -206,14 +206,14 @@ v_fract_f64_e32 v[0:1], lit(1.0)
 v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1.0
 // GFX11: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1.0 ; encoding: [0x08,0x40,0x44,0xcc,0x00,0x09,0xca,0x1b]
 // NOGFX12: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-// NOGFX1250: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
 // NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
 // NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], lit(1.0)
 // NOGFX11: :[[@LINE-1]]:54: error: invalid operand for instruction
 // NOGFX12: :[[@LINE-2]]:54: error: invalid operand for instruction
-// NOGFX1250: :[[@LINE-3]]:54: error: invalid operand for instruction
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
 // NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
 // NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
@@ -658,14 +658,14 @@ v_fract_f64_e32 v[0:1], 0xffffffffffffffff
 v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1
 // GFX11: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1 ; encoding: [0x08,0x40,0x44,0xcc,0x00,0x09,0x06,0x1a]
 // NOGFX12: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
-// NOGFX1250: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
 // NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
 // NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], lit(1)
 // NOGFX11: :[[@LINE-1]]:54: error: invalid operand for instruction
 // NOGFX12: :[[@LINE-2]]:54: error: invalid operand for instruction
-// NOGFX1250: :[[@LINE-3]]:54: error: invalid operand for instruction
+// NOGFX1250: :[[@LINE-3]]:1: error: instruction not supported on this GPU
 // NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
 // NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_wmma_w32.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_wmma_w32.txt
new file mode 100644
index 0000000000000..1e778fb04aea2
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_wmma_w32.txt
@@ -0,0 +1,1628 @@
+# RUN: not llvm-mc -disassemble -triple=amdgcn -mcpu=gfx1170 -show-encoding %s | FileCheck --check-prefix=GFX1170 %s
+# RUN: not llvm-mc -disassemble -triple=amdgcn -mcpu=gfx1170 -show-encoding %s 2>&1 | FileCheck --implicit-check-not=warning: --check-prefix=GFX1170-ERR %s
+
+[0x08,0x40,0x40,0xcc,0x00,0x09,0x22,0x1c]
+# GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] ; encoding: [0x08,0x40,0x40,0xcc,0x00,0x09,0x22,0x1c]
+
+[0x08,0xc0,0x40,0xcc,0x00,0x09,0x22,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x48,0x40,0xcc,0x00,0x09,0x22,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x50,0x40,0xcc,0x00,0x09,0x22,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x60,0x40,0xcc,0x00,0x09,0x22,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x40,0x40,0xcc,0x00,0x09,0x22,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x40,0x40,0xcc,0x00,0x09,0x22,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x00,0x40,0xcc,0x00,0x09,0x22,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x48,0x40,0xcc,0x00,0x09,0x22,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x50,0x40,0xcc,0x00,0x09,0x22,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x58,0x40,0xcc,0x00,0x09,0x22,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x40,0x40,0xcc,0x00,0x09,0x22,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,0,0] ; encoding: [0x08,0x40,0x40,0xcc,0x00,0x09,0x22,0x3c]
+
+[0x08,0x40,0x40,0xcc,0x00,0x09,0x22,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,1,0] ; encoding: [0x08,0x40,0x40,0xcc,0x00,0x09,0x22,0x5c]
+
+[0x08,0x40,0x40,0xcc,0x00,0x09,0x22,0x9c] # neg_lo:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1] ; encoding: [0x08,0x40,0x40,0xcc,0x00,0x09,0x22,0x9c]
+
+[0x08,0x41,0x40,0xcc,0x00,0x09,0x22,0x1c] # neg_hi:[1,0,0]
+# GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[1,0,0] ; encoding: [0x08,0x41,0x40,0xcc,0x00,0x09,0x22,0x1c]
+
+[0x08,0x42,0x40,0xcc,0x00,0x09,0x22,0x1c] # neg_hi:[0,1,0]
+# GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,1,0] ; encoding: [0x08,0x42,0x40,0xcc,0x00,0x09,0x22,0x1c]
+
+[0x08,0x44,0x40,0xcc,0x00,0x09,0x22,0x1c] # neg_hi:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,0,1] ; encoding: [0x08,0x44,0x40,0xcc,0x00,0x09,0x22,0x1c]
+
+[0x08,0x40,0x40,0xcc,0x00,0x08,0x22,0x1c] # sgpr src0
+# GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v[4:7], v[8:15] ; encoding: [0x08,0x40,0x40,0xcc,0x00,0x08,0x22,0x1c]
+
+[0x08,0x40,0x40,0xcc,0x00,0x09,0x20,0x1c] # sgpr src1
+# GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], s[4:7]/*Invalid register, operand has 'VReg_128' register class*/, v[8:15] ; encoding: [0x08,0x40,0x40,0xcc,0x00,0x09,0x20,0x1c]
+
+[0x08,0x40,0x40,0xcc,0x00,0x09,0x22,0x18] # sgpr src2
+# GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], s[8:15]/*Invalid register, operand has 'VReg_256' register class*/ ; encoding: [0x08,0x40,0x40,0xcc,0x00,0x09,0x22,0x18]
+
+[0x08,0x40,0x40,0xcc,0xf2,0x08,0x22,0x1c] # 1.0 src0
+# GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], 1.0/*Invalid immediate*/, v[4:7], v[8:15] ; encoding: [0x08,0x40,0x40,0xcc,0x00,0x08,0x22,0x1c]
+
+[0x08,0x40,0x40,0xcc,0x00,0xe5,0x21,0x1c] # 1.0 src1
+# GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], 1.0/*Invalid immediate*/, v[8:15] ; encoding: [0x08,0x40,0x40,0xcc,0x00,0x01,0x20,0x1c]
+
+[0x08,0x40,0x40,0xcc,0x00,0x09,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], 1.0 ; encoding: [0x08,0x40,0x40,0xcc,0x00,0x09,0xca,0x1b]
+
+[0x08,0x40,0x40,0xcc,0x00,0x09,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], 1 ; encoding: [0x08,0x40,0x40,0xcc,0x00,0x09,0x06,0x1a]
+
+
+
+[0x08,0x40,0x41,0xcc,0x00,0x09,0x22,0x1c]
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] ; encoding: [0x08,0x40,0x41,0xcc,0x00,0x09,0x22,0x1c]
+
+[0x08,0xc0,0x41,0xcc,0x00,0x09,0x22,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x48,0x41,0xcc,0x00,0x09,0x22,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x50,0x41,0xcc,0x00,0x09,0x22,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x60,0x41,0xcc,0x00,0x09,0x22,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x40,0x41,0xcc,0x00,0x09,0x22,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x40,0x41,0xcc,0x00,0x09,0x22,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x00,0x41,0xcc,0x00,0x09,0x22,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x41,0xcc,0x00,0x05,0x12,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x41,0xcc,0x00,0x05,0x12,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x58,0x41,0xcc,0x00,0x05,0x12,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x40,0x41,0xcc,0x00,0x09,0x22,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,0,0] ; encoding: [0x08,0x40,0x41,0xcc,0x00,0x09,0x22,0x3c]
+
+[0x08,0x40,0x41,0xcc,0x00,0x09,0x22,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,1,0] ; encoding: [0x08,0x40,0x41,0xcc,0x00,0x09,0x22,0x5c]
+
+[0x08,0x40,0x41,0xcc,0x00,0x09,0x22,0x9c] # neg_lo:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,0,1] ; encoding: [0x08,0x40,0x41,0xcc,0x00,0x09,0x22,0x9c]
+
+[0x08,0x41,0x41,0xcc,0x00,0x09,0x22,0x1c] # neg_hi:[1,0,0]
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[1,0,0] ; encoding: [0x08,0x41,0x41,0xcc,0x00,0x09,0x22,0x1c]
+
+[0x08,0x42,0x41,0xcc,0x00,0x09,0x22,0x1c] # neg_hi:[0,1,0]
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,1,0] ; encoding: [0x08,0x42,0x41,0xcc,0x00,0x09,0x22,0x1c]
+
+[0x08,0x44,0x41,0xcc,0x00,0x09,0x22,0x1c] # neg_hi:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15] neg_hi:[0,0,1] ; encoding: [0x08,0x44,0x41,0xcc,0x00,0x09,0x22,0x1c]
+
+[0x08,0x40,0x41,0xcc,0x00,0x08,0x22,0x1c] # sgpr src0
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v[4:7], v[8:15] ; encoding: [0x08,0x40,0x41,0xcc,0x00,0x08,0x22,0x1c]
+
+[0x08,0x40,0x41,0xcc,0x00,0x09,0x20,0x1c] # sgpr src1
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], s[4:7]/*Invalid register, operand has 'VReg_128' register class*/, v[8:15] ; encoding: [0x08,0x40,0x41,0xcc,0x00,0x09,0x20,0x1c]
+
+[0x08,0x40,0x41,0xcc,0x00,0x09,0x22,0x18] # sgpr src2
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], s[8:15]/*Invalid register, operand has 'VReg_256' register class*/ ; encoding: [0x08,0x40,0x41,0xcc,0x00,0x09,0x22,0x18]
+
+[0x08,0x40,0x41,0xcc,0xf2,0x08,0x22,0x1c] # 1.0 src0
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], 1.0/*Invalid immediate*/, v[4:7], v[8:15] ; encoding: [0x08,0x40,0x41,0xcc,0x00,0x08,0x22,0x1c]
+
+[0x08,0x40,0x41,0xcc,0x00,0xe5,0x21,0x1c] # 1.0 src1
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], 1.0/*Invalid immediate*/, v[8:15] ; encoding: [0x08,0x40,0x41,0xcc,0x00,0x01,0x20,0x1c]
+
+[0x08,0x40,0x41,0xcc,0x00,0x09,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], 1.0 ; encoding: [0x08,0x40,0x41,0xcc,0x00,0x09,0xca,0x1b]
+
+[0x08,0x40,0x41,0xcc,0x00,0x09,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], 1 ; encoding: [0x08,0x40,0x41,0xcc,0x00,0x09,0x06,0x1a]
+
+
+
+[0x08,0x40,0x42,0xcc,0x00,0x09,0x22,0x1c]
+# GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] ; encoding: [0x08,0x40,0x42,0xcc,0x00,0x09,0x22,0x1c]
+
+[0x08,0xc0,0x42,0xcc,0x00,0x09,0x22,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x48,0x42,0xcc,0x00,0x09,0x22,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x50,0x42,0xcc,0x00,0x09,0x22,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x60,0x42,0xcc,0x00,0x09,0x22,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x40,0x42,0xcc,0x00,0x09,0x22,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x40,0x42,0xcc,0x00,0x09,0x22,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x00,0x42,0xcc,0x00,0x09,0x22,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x42,0xcc,0x00,0x05,0x12,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x42,0xcc,0x00,0x05,0x12,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x58,0x42,0xcc,0x00,0x05,0x12,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x40,0x42,0xcc,0x00,0x09,0x22,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0] ; encoding: [0x08,0x40,0x42,0xcc,0x00,0x09,0x22,0x3c]
+
+[0x08,0x40,0x42,0xcc,0x00,0x09,0x22,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0] ; encoding: [0x08,0x40,0x42,0xcc,0x00,0x09,0x22,0x5c]
+
+[0x08,0x40,0x42,0xcc,0x00,0x09,0x22,0x9c] # neg_lo:[0,0,1]
+# GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,0,1] ; encoding: [0x08,0x40,0x42,0xcc,0x00,0x09,0x22,0x9c]
+
+[0x08,0x41,0x42,0xcc,0x00,0x09,0x22,0x1c] # neg_hi:[1,0,0]
+# GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[1,0,0] ; encoding: [0x08,0x41,0x42,0xcc,0x00,0x09,0x22,0x1c]
+
+[0x08,0x42,0x42,0xcc,0x00,0x09,0x22,0x1c] # neg_hi:[0,1,0]
+# GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[0,1,0] ; encoding: [0x08,0x42,0x42,0xcc,0x00,0x09,0x22,0x1c]
+
+[0x08,0x44,0x42,0xcc,0x00,0x09,0x22,0x1c] # neg_hi:[0,0,1]
+# GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[0,0,1] ; encoding: [0x08,0x44,0x42,0xcc,0x00,0x09,0x22,0x1c]
+
+[0x08,0x40,0x42,0xcc,0x00,0x08,0x22,0x1c] # sgpr src0
+# GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v[4:7], v[8:11] ; encoding: [0x08,0x40,0x42,0xcc,0x00,0x08,0x22,0x1c]
+
+[0x08,0x40,0x42,0xcc,0x00,0x09,0x20,0x1c] # sgpr src1
+# GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], s[4:7]/*Invalid register, operand has 'VReg_128' register class*/, v[8:11] ; encoding: [0x08,0x40,0x42,0xcc,0x00,0x09,0x20,0x1c]
+
+[0x08,0x40,0x42,0xcc,0x00,0x09,0x22,0x18] # sgpr src2
+# GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], s[8:11]/*Invalid register, operand has 'VReg_128' register class*/ ; encoding: [0x08,0x40,0x42,0xcc,0x00,0x09,0x22,0x18]
+
+[0x08,0x40,0x42,0xcc,0xf2,0x08,0x22,0x1c] # 1.0 src0
+# GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], 1.0/*Invalid immediate*/, v[4:7], v[8:11] ; encoding: [0x08,0x40,0x42,0xcc,0x00,0x08,0x22,0x1c]
+
+[0x08,0x40,0x42,0xcc,0x00,0xe5,0x21,0x1c] # 1.0 src1
+# GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], 1.0/*Invalid immediate*/, v[8:11] ; encoding: [0x08,0x40,0x42,0xcc,0x00,0x01,0x20,0x1c]
+
+[0x08,0x40,0x42,0xcc,0x00,0x09,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], 1.0 ; encoding: [0x08,0x40,0x42,0xcc,0x00,0x09,0xca,0x1b]
+
+[0x08,0x40,0x42,0xcc,0x00,0x09,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], 1 ; encoding: [0x08,0x40,0x42,0xcc,0x00,0x09,0x06,0x1a]
+
+
+
+[0x08,0x40,0x43,0xcc,0x00,0x09,0x22,0x1c]
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] ; encoding: [0x08,0x40,0x43,0xcc,0x00,0x09,0x22,0x1c]
+
+[0x08,0xc0,0x43,0xcc,0x00,0x09,0x22,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x48,0x43,0xcc,0x00,0x09,0x22,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x50,0x43,0xcc,0x00,0x09,0x22,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x60,0x43,0xcc,0x00,0x09,0x22,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x40,0x43,0xcc,0x00,0x09,0x22,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x40,0x43,0xcc,0x00,0x09,0x22,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x00,0x43,0xcc,0x00,0x09,0x22,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x43,0xcc,0x00,0x05,0x12,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x43,0xcc,0x00,0x05,0x12,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x58,0x43,0xcc,0x00,0x05,0x12,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x08,0x40,0x43,0xcc,0x00,0x09,0x22,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0] ; encoding: [0x08,0x40,0x43,0xcc,0x00,0x09,0x22,0x3c]
+
+[0x08,0x40,0x43,0xcc,0x00,0x09,0x22,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0] ; encoding: [0x08,0x40,0x43,0xcc,0x00,0x09,0x22,0x5c]
+
+[0x08,0x40,0x43,0xcc,0x00,0x09,0x22,0x9c] # neg_lo:[0,0,1]
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,0,1] ; encoding: [0x08,0x40,0x43,0xcc,0x00,0x09,0x22,0x9c]
+
+[0x08,0x41,0x43,0xcc,0x00,0x09,0x22,0x1c] # neg_hi:[1,0,0]
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[1,0,0] ; encoding: [0x08,0x41,0x43,0xcc,0x00,0x09,0x22,0x1c]
+
+[0x08,0x42,0x43,0xcc,0x00,0x09,0x22,0x1c] # neg_hi:[0,1,0]
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[0,1,0] ; encoding: [0x08,0x42,0x43,0xcc,0x00,0x09,0x22,0x1c]
+
+[0x08,0x44,0x43,0xcc,0x00,0x09,0x22,0x1c] # neg_hi:[0,0,1]
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11] neg_hi:[0,0,1] ; encoding: [0x08,0x44,0x43,0xcc,0x00,0x09,0x22,0x1c]
+
+[0x08,0x40,0x43,0xcc,0x00,0x08,0x22,0x1c] # sgpr src0
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v[4:7], v[8:11] ; encoding: [0x08,0x40,0x43,0xcc,0x00,0x08,0x22,0x1c]
+
+[0x08,0x40,0x43,0xcc,0x00,0x09,0x20,0x1c] # sgpr src1
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], s[4:7]/*Invalid register, operand has 'VReg_128' register class*/, v[8:11] ; encoding: [0x08,0x40,0x43,0xcc,0x00,0x09,0x20,0x1c]
+
+[0x08,0x40,0x43,0xcc,0x00,0x09,0x22,0x18] # sgpr src2
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], s[8:11]/*Invalid register, operand has 'VReg_128' register class*/ ; encoding: [0x08,0x40,0x43,0xcc,0x00,0x09,0x22,0x18]
+
+[0x08,0x40,0x43,0xcc,0xf2,0x08,0x22,0x1c] # 1.0 src0
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], 1.0/*Invalid immediate*/, v[4:7], v[8:11] ; encoding: [0x08,0x40,0x43,0xcc,0x00,0x08,0x22,0x1c]
+
+[0x08,0x40,0x43,0xcc,0x00,0xe5,0x21,0x1c] # 1.0 src1
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], 1.0/*Invalid immediate*/, v[8:11] ; encoding: [0x08,0x40,0x43,0xcc,0x00,0x01,0x20,0x1c]
+
+[0x08,0x40,0x43,0xcc,0x00,0x09,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], 1.0 ; encoding: [0x08,0x40,0x43,0xcc,0x00,0x09,0xca,0x1b]
+
+[0x08,0x40,0x43,0xcc,0x00,0x09,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], 1 ; encoding: [0x08,0x40,0x43,0xcc,0x00,0x09,0x06,0x1a]
+
+
+
+[0x04,0x40,0x44,0xcc,0x00,0x05,0x12,0x1c]
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] ; encoding: [0x04,0x40,0x44,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0xc0,0x44,0xcc,0x00,0x05,0x12,0x1c] # clamp
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] clamp ; encoding: [0x04,0xc0,0x44,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x48,0x44,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x44,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x60,0x44,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x44,0xcc,0x00,0x05,0x12,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x44,0xcc,0x00,0x05,0x12,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x00,0x44,0xcc,0x00,0x05,0x12,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x48,0x44,0xcc,0x00,0x03,0x0a,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x50,0x44,0xcc,0x00,0x03,0x0a,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x58,0x44,0xcc,0x00,0x03,0x0a,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x44,0xcc,0x00,0x05,0x12,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0] ; encoding: [0x04,0x40,0x44,0xcc,0x00,0x05,0x12,0x3c]
+
+[0x04,0x40,0x44,0xcc,0x00,0x05,0x12,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0] ; encoding: [0x04,0x40,0x44,0xcc,0x00,0x05,0x12,0x5c]
+
+[0x04,0x40,0x44,0xcc,0x00,0x05,0x12,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x41,0x44,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x42,0x44,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x44,0x44,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x44,0xcc,0x00,0x04,0x12,0x1c] # sgpr src0
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[4:11], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:3], v[4:11] ; encoding: [0x04,0x40,0x44,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x44,0xcc,0x00,0x05,0x10,0x1c] # sgpr src1
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], s[2:3]/*Invalid register, operand has 'VReg_64' register class*/, v[4:11] ; encoding: [0x04,0x40,0x44,0xcc,0x00,0x05,0x10,0x1c]
+
+[0x04,0x40,0x44,0xcc,0x00,0x05,0x12,0x18] # sgpr src2
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], s[4:11]/*Invalid register, operand has 'VReg_256' register class*/ ; encoding: [0x04,0x40,0x44,0xcc,0x00,0x05,0x12,0x18]
+
+[0x04,0x40,0x44,0xcc,0x81,0x04,0x12,0x1c] # 1 src0
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[4:11], 1/*Invalid immediate*/, v[2:3], v[4:11] ; encoding: [0x04,0x40,0x44,0xcc,0x01,0x04,0x12,0x1c]
+
+[0x04,0x40,0x44,0xcc,0x00,0x03,0x11,0x1c] # 1 src1
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], 1/*Invalid immediate*/, v[4:11] ; encoding: [0x04,0x40,0x44,0xcc,0x00,0x03,0x10,0x1c]
+
+[0x04,0x40,0x44,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x44,0xcc,0x00,0x05,0x06,0x1a]
+
+[0x04,0x40,0x44,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x44,0xcc,0x00,0x05,0xca,0x1b]
+
+
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x1c]
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x1c]
+
+[0x02,0xc0,0x45,0xcc,0x00,0x03,0x0a,0x1c] # clamp
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] clamp ; encoding: [0x02,0xc0,0x45,0xcc,0x00,0x03,0x0a,0x1c]
+
+[0x02,0x48,0x45,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x50,0x45,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x60,0x45,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x00,0x45,0xcc,0x00,0x03,0x0a,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x48,0x45,0xcc,0x00,0x03,0x0a,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x50,0x45,0xcc,0x00,0x03,0x0a,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x58,0x45,0xcc,0x00,0x03,0x0a,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_lo:[1,0,0] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x3c]
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9] neg_lo:[0,1,0] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x5c]
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x41,0x45,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x42,0x45,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x44,0x45,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x45,0xcc,0x00,0x02,0x0a,0x1c] # sgpr src0
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:9], s0/*Invalid register, operand has 'VGPR_32' register class*/, v1, v[2:9] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x02,0x0a,0x1c]
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x08,0x1c] # sgpr src1
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:9], v0, s1/*Invalid register, operand has 'VGPR_32' register class*/, v[2:9] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x08,0x1c]
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x18] # sgpr src2
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, s[0:7]/*Invalid register, operand has 'VReg_256' register class*/ ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x02,0x18]
+
+[0x02,0x40,0x45,0xcc,0x81,0x02,0x0a,0x1c] # 1 src0
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:9], 1/*Invalid immediate*/, v1, v[2:9] ; encoding: [0x02,0x40,0x45,0xcc,0x01,0x02,0x0a,0x1c]
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x09,0x1c] # 1 src1
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:9], v0, 1/*Invalid immediate*/, v[2:9] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x08,0x1c]
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, 1 ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x06,0x1a]
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, 1.0 ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0xca,0x1b]
+
+
+
+[0x04,0x40,0x46,0xcc,0x00,0x05,0x12,0x1c]
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] ; encoding: [0x04,0x40,0x46,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0xc0,0x46,0xcc,0x00,0x05,0x12,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x46,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x46,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x60,0x46,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x46,0xcc,0x00,0x05,0x12,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x46,0xcc,0x00,0x05,0x12,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x00,0x46,0xcc,0x00,0x05,0x12,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x46,0xcc,0x00,0x05,0x12,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x46,0xcc,0x00,0x05,0x12,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x58,0x46,0xcc,0x00,0x05,0x12,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x46,0xcc,0x00,0x05,0x12,0x3c] # neg_lo:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x46,0xcc,0x00,0x05,0x12,0x5c] # neg_lo:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x46,0xcc,0x00,0x05,0x12,0x9c] # neg_lo:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x46,0xcc,0x00,0x05,0x12,0x9c]
+
+[0x04,0x41,0x46,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x42,0x46,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x44,0x46,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x46,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x40,0x46,0xcc,0x00,0x04,0x12,0x1c] # sgpr src0
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[4:11], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:3], v[4:11] ; encoding: [0x04,0x40,0x46,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x46,0xcc,0x00,0x05,0x10,0x1c] # sgpr src1
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], s[2:3]/*Invalid register, operand has 'VReg_64' register class*/, v[4:11] ; encoding: [0x04,0x40,0x46,0xcc,0x00,0x05,0x10,0x1c]
+
+[0x04,0x40,0x46,0xcc,0x00,0x05,0x12,0x18] # sgpr src2
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], s[4:11]/*Invalid register, operand has 'VReg_256' register class*/ ; encoding: [0x04,0x40,0x46,0xcc,0x00,0x05,0x12,0x18]
+
+[0x04,0x40,0x46,0xcc,0xf2,0x04,0x12,0x1c] # 1.0 src0
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[4:11], 1.0/*Invalid immediate*/, v[2:3], v[4:11] ; encoding: [0x04,0x40,0x46,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x46,0xcc,0x00,0xe5,0x11,0x1c] # 1.0 src1
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], 1.0/*Invalid immediate*/, v[4:11] ; encoding: [0x04,0x40,0x46,0xcc,0x00,0x01,0x10,0x1c]
+
+[0x04,0x40,0x46,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x46,0xcc,0x00,0x05,0xca,0x1b]
+
+[0x04,0x40,0x46,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x46,0xcc,0x00,0x05,0x06,0x1a]
+
+
+
+[0x04,0x40,0x48,0xcc,0x00,0x05,0x12,0x1c]
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] ; encoding: [0x04,0x40,0x48,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0xc0,0x48,0xcc,0x00,0x05,0x12,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x48,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x48,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x60,0x48,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x48,0xcc,0x00,0x05,0x12,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x48,0xcc,0x00,0x05,0x12,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x00,0x48,0xcc,0x00,0x05,0x12,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x48,0xcc,0x00,0x05,0x12,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x48,0xcc,0x00,0x05,0x12,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x58,0x48,0xcc,0x00,0x05,0x12,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x48,0xcc,0x00,0x05,0x12,0x3c] # neg_lo:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x48,0xcc,0x00,0x05,0x12,0x5c] # neg_lo:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x48,0xcc,0x00,0x05,0x12,0x9c] # neg_lo:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x48,0xcc,0x00,0x05,0x12,0x9c]
+
+[0x04,0x41,0x48,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x42,0x48,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x44,0x48,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x48,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x40,0x48,0xcc,0x00,0x04,0x12,0x1c] # sgpr src0
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[4:11], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:3], v[4:11] ; encoding: [0x04,0x40,0x48,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x48,0xcc,0x00,0x05,0x10,0x1c] # sgpr src1
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], s[2:3]/*Invalid register, operand has 'VReg_64' register class*/, v[4:11] ; encoding: [0x04,0x40,0x48,0xcc,0x00,0x05,0x10,0x1c]
+
+[0x04,0x40,0x48,0xcc,0x00,0x05,0x12,0x18] # sgpr src2
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], s[4:11]/*Invalid register, operand has 'VReg_256' register class*/ ; encoding: [0x04,0x40,0x48,0xcc,0x00,0x05,0x12,0x18]
+
+[0x04,0x40,0x48,0xcc,0xf2,0x04,0x12,0x1c] # 1.0 src0
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[4:11], 1.0/*Invalid immediate*/, v[2:3], v[4:11] ; encoding: [0x04,0x40,0x48,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x48,0xcc,0x00,0xe5,0x11,0x1c] # 1.0 src1
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], 1.0/*Invalid immediate*/, v[4:11] ; encoding: [0x04,0x40,0x48,0xcc,0x00,0x01,0x10,0x1c]
+
+[0x04,0x40,0x48,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x48,0xcc,0x00,0x05,0xca,0x1b]
+
+[0x04,0x40,0x48,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x48,0xcc,0x00,0x05,0x06,0x1a]
+
+
+
+[0x04,0x40,0x47,0xcc,0x00,0x05,0x12,0x1c]
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] ; encoding: [0x04,0x40,0x47,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0xc0,0x47,0xcc,0x00,0x05,0x12,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x47,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x47,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x60,0x47,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x47,0xcc,0x00,0x05,0x12,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x47,0xcc,0x00,0x05,0x12,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x00,0x47,0xcc,0x00,0x05,0x12,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x47,0xcc,0x00,0x05,0x12,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x47,0xcc,0x00,0x05,0x12,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x58,0x47,0xcc,0x00,0x05,0x12,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x47,0xcc,0x00,0x05,0x12,0x3c] # neg_lo:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x47,0xcc,0x00,0x05,0x12,0x5c] # neg_lo:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x47,0xcc,0x00,0x05,0x12,0x9c] # neg_lo:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x47,0xcc,0x00,0x05,0x12,0x9c]
+
+[0x04,0x41,0x47,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x42,0x47,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x44,0x47,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x47,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x40,0x47,0xcc,0x00,0x04,0x12,0x1c] # sgpr src0
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[4:11], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:3], v[4:11] ; encoding: [0x04,0x40,0x47,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x47,0xcc,0x00,0x05,0x10,0x1c] # sgpr src1
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], s[2:3]/*Invalid register, operand has 'VReg_64' register class*/, v[4:11] ; encoding: [0x04,0x40,0x47,0xcc,0x00,0x05,0x10,0x1c]
+
+[0x04,0x40,0x47,0xcc,0x00,0x05,0x12,0x18] # sgpr src2
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], s[4:11]/*Invalid register, operand has 'VReg_256' register class*/ ; encoding: [0x04,0x40,0x47,0xcc,0x00,0x05,0x12,0x18]
+
+[0x04,0x40,0x47,0xcc,0xf2,0x04,0x12,0x1c] # 1.0 src0
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[4:11], 1.0/*Invalid immediate*/, v[2:3], v[4:11] ; encoding: [0x04,0x40,0x47,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x47,0xcc,0x00,0xe5,0x11,0x1c] # 1.0 src1
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], 1.0/*Invalid immediate*/, v[4:11] ; encoding: [0x04,0x40,0x47,0xcc,0x00,0x01,0x10,0x1c]
+
+[0x04,0x40,0x47,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x47,0xcc,0x00,0x05,0xca,0x1b]
+
+[0x04,0x40,0x47,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x47,0xcc,0x00,0x05,0x06,0x1a]
+
+
+
+[0x04,0x40,0x49,0xcc,0x00,0x05,0x12,0x1c]
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] ; encoding: [0x04,0x40,0x49,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0xc0,0x49,0xcc,0x00,0x05,0x12,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x49,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x49,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x60,0x49,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x49,0xcc,0x00,0x05,0x12,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x49,0xcc,0x00,0x05,0x12,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x00,0x49,0xcc,0x00,0x05,0x12,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x49,0xcc,0x00,0x05,0x12,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x49,0xcc,0x00,0x05,0x12,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x58,0x49,0xcc,0x00,0x05,0x12,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x49,0xcc,0x00,0x05,0x12,0x3c] # neg_lo:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x49,0xcc,0x00,0x05,0x12,0x5c] # neg_lo:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x49,0xcc,0x00,0x05,0x12,0x9c] # neg_lo:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x49,0xcc,0x00,0x05,0x12,0x9c]
+
+[0x04,0x41,0x49,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x42,0x49,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x44,0x49,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x49,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x40,0x49,0xcc,0x00,0x04,0x12,0x1c] # sgpr src0
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[4:11], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:3], v[4:11] ; encoding: [0x04,0x40,0x49,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x49,0xcc,0x00,0x05,0x10,0x1c] # sgpr src1
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], s[2:3]/*Invalid register, operand has 'VReg_64' register class*/, v[4:11] ; encoding: [0x04,0x40,0x49,0xcc,0x00,0x05,0x10,0x1c]
+
+[0x04,0x40,0x49,0xcc,0x00,0x05,0x12,0x18] # sgpr src2
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], s[4:11]/*Invalid register, operand has 'VReg_256' register class*/ ; encoding: [0x04,0x40,0x49,0xcc,0x00,0x05,0x12,0x18]
+
+[0x04,0x40,0x49,0xcc,0xf2,0x04,0x12,0x1c] # 1.0 src0
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[4:11], 1.0/*Invalid immediate*/, v[2:3], v[4:11] ; encoding: [0x04,0x40,0x49,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x49,0xcc,0x00,0xe5,0x11,0x1c] # 1.0 src1
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], 1.0/*Invalid immediate*/, v[4:11] ; encoding: [0x04,0x40,0x49,0xcc,0x00,0x01,0x10,0x1c]
+
+[0x04,0x40,0x49,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x49,0xcc,0x00,0x05,0xca,0x1b]
+
+[0x04,0x40,0x49,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x49,0xcc,0x00,0x05,0x06,0x1a]
+
+
+
+[0x04,0x40,0x4a,0xcc,0x00,0x05,0x12,0x1c]
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] ; encoding: [0x04,0x40,0x4a,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0xc0,0x4a,0xcc,0x00,0x05,0x12,0x1c] # clamp
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] clamp ; encoding: [0x04,0xc0,0x4a,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x48,0x4a,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x4a,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x60,0x4a,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x4a,0xcc,0x00,0x05,0x12,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x4a,0xcc,0x00,0x05,0x12,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x00,0x4a,0xcc,0x00,0x05,0x12,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x4a,0xcc,0x00,0x05,0x12,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x4a,0xcc,0x00,0x05,0x12,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x58,0x4a,0xcc,0x00,0x05,0x12,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x4a,0xcc,0x00,0x05,0x12,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0] ; encoding: [0x04,0x40,0x4a,0xcc,0x00,0x05,0x12,0x3c]
+
+[0x04,0x40,0x4a,0xcc,0x00,0x05,0x12,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0] ; encoding: [0x04,0x40,0x4a,0xcc,0x00,0x05,0x12,0x5c]
+
+[0x04,0x40,0x4a,0xcc,0x00,0x05,0x12,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x41,0x4a,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x42,0x4a,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x44,0x4a,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x4a,0xcc,0x00,0x04,0x12,0x1c] # sgpr src0
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[4:11], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:3], v[4:11] ; encoding: [0x04,0x40,0x4a,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x4a,0xcc,0x00,0x05,0x10,0x1c] # sgpr src1
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], s[2:3]/*Invalid register, operand has 'VReg_64' register class*/, v[4:11] ; encoding: [0x04,0x40,0x4a,0xcc,0x00,0x05,0x10,0x1c]
+
+[0x04,0x40,0x4a,0xcc,0x00,0x05,0x12,0x18] # sgpr src2
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], s[4:11]/*Invalid register, operand has 'VReg_256' register class*/ ; encoding: [0x04,0x40,0x4a,0xcc,0x00,0x05,0x12,0x18]
+
+[0x04,0x40,0x4a,0xcc,0x81,0x04,0x12,0x1c] # 1 src0
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[4:11], 1/*Invalid immediate*/, v[2:3], v[4:11] ; encoding: [0x04,0x40,0x4a,0xcc,0x01,0x04,0x12,0x1c]
+
+[0x04,0x40,0x4a,0xcc,0x00,0x03,0x11,0x1c] # 1 src1
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], 1/*Invalid immediate*/, v[4:11] ; encoding: [0x04,0x40,0x4a,0xcc,0x00,0x03,0x10,0x1c]
+
+[0x04,0x40,0x4a,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x4a,0xcc,0x00,0x05,0x06,0x1a]
+
+[0x04,0x40,0x4a,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x4a,0xcc,0x00,0x05,0xca,0x1b]
+
+
+
+[0x0c,0x40,0x50,0xcc,0x00,0x09,0x52,0x1c]
+# GFX1170:v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 ; encoding: [0x0c,0x40,0x50,0xcc,0x00,0x09,0x52,0x1c]
+
+[0x0c,0xc0,0x50,0xcc,0x00,0x09,0x52,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x48,0x50,0xcc,0x00,0x09,0x52,0x1c] # op_sel:[1,0,0]
+# GFX1170:v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 index_key:1 ; encoding: [0x0c,0x48,0x50,0xcc,0x00,0x09,0x52,0x1c]
+
+[0x0c,0x50,0x50,0xcc,0x00,0x09,0x52,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x60,0x50,0xcc,0x00,0x09,0x52,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x40,0x50,0xcc,0x00,0x09,0x52,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x40,0x50,0xcc,0x00,0x09,0x52,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x00,0x50,0xcc,0x00,0x09,0x52,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x48,0x50,0xcc,0x00,0x09,0x52,0x1c] # index_key:1
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 index_key:1 ; encoding: [0x0c,0x48,0x50,0xcc,0x00,0x09,0x52,0x1c]
+
+[0x0c,0x50,0x50,0xcc,0x00,0x09,0x52,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x58,0x50,0xcc,0x00,0x09,0x52,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x40,0x50,0xcc,0x00,0x09,0x52,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[1,0,0] ; encoding: [0x0c,0x40,0x50,0xcc,0x00,0x09,0x52,0x3c]
+
+[0x0c,0x40,0x50,0xcc,0x00,0x09,0x52,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[0,1,0] ; encoding: [0x0c,0x40,0x50,0xcc,0x00,0x09,0x52,0x5c]
+
+[0x0c,0x40,0x50,0xcc,0x00,0x09,0x52,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x41,0x50,0xcc,0x00,0x09,0x52,0x1c] # neg_hi:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_hi:[1,0,0] ; encoding: [0x0c,0x41,0x50,0xcc,0x00,0x09,0x52,0x1c]
+
+[0x0c,0x42,0x50,0xcc,0x00,0x09,0x52,0x1c] # neg_hi:[0,1,0]
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20 neg_hi:[0,1,0] ; encoding: [0x0c,0x42,0x50,0xcc,0x00,0x09,0x52,0x1c]
+
+[0x0c,0x44,0x50,0xcc,0x00,0x09,0x52,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x40,0x50,0xcc,0x00,0x08,0x52,0x1c] # sgpr src0
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v[4:11], v20 ; encoding: [0x0c,0x40,0x50,0xcc,0x00,0x08,0x52,0x1c]
+
+[0x0c,0x40,0x50,0xcc,0x00,0x09,0x50,0x1c] # sgpr src1
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], s[4:11]/*Invalid register, operand has 'VReg_256' register class*/, v20 ; encoding: [0x0c,0x40,0x50,0xcc,0x00,0x09,0x50,0x1c]
+
+[0x0c,0x40,0x50,0xcc,0x00,0x09,0x52,0x18] # sgpr src2
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], s20/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x0c,0x40,0x50,0xcc,0x00,0x09,0x52,0x18]
+
+[0x0c,0x40,0x50,0xcc,0xf2,0x08,0x52,0x1c] # 1.0 src0
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], 1.0/*Invalid immediate*/, v[4:11], v20 ; encoding: [0x0c,0x40,0x50,0xcc,0x00,0x08,0x52,0x1c]
+
+[0x0c,0x40,0x50,0xcc,0x00,0xe5,0x51,0x1c] # 1.0 src1
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], 1.0/*Invalid immediate*/, v20 ; encoding: [0x0c,0x40,0x50,0xcc,0x00,0x01,0x50,0x1c]
+
+[0x0c,0x40,0x50,0xcc,0x00,0x09,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], 1.0/*Invalid immediate*/ ; encoding: [0x0c,0x40,0x50,0xcc,0x00,0x09,0x02,0x18]
+
+[0x0c,0x40,0x50,0xcc,0x00,0x09,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], 1/*Invalid immediate*/ ; encoding: [0x0c,0x40,0x50,0xcc,0x00,0x09,0x06,0x18]
+
+
+
+[0x0c,0x40,0x51,0xcc,0x00,0x09,0x52,0x1c]
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 ; encoding: [0x0c,0x40,0x51,0xcc,0x00,0x09,0x52,0x1c]
+
+[0x0c,0xc0,0x51,0xcc,0x00,0x09,0x52,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x48,0x51,0xcc,0x00,0x09,0x52,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 index_key:1 ; encoding: [0x0c,0x48,0x51,0xcc,0x00,0x09,0x52,0x1c]
+
+[0x0c,0x50,0x51,0xcc,0x00,0x09,0x52,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x60,0x51,0xcc,0x00,0x09,0x52,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x40,0x51,0xcc,0x00,0x09,0x52,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x40,0x51,0xcc,0x00,0x09,0x52,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x00,0x51,0xcc,0x00,0x09,0x52,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x48,0x51,0xcc,0x00,0x09,0x52,0x1c] # index_key:1
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 index_key:1 ; encoding: [0x0c,0x48,0x51,0xcc,0x00,0x09,0x52,0x1c]
+
+[0x0c,0x50,0x51,0xcc,0x00,0x09,0x52,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x58,0x51,0xcc,0x00,0x09,0x52,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x40,0x51,0xcc,0x00,0x09,0x52,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[1,0,0] ; encoding: [0x0c,0x40,0x51,0xcc,0x00,0x09,0x52,0x3c]
+
+[0x0c,0x40,0x51,0xcc,0x00,0x09,0x52,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 neg_lo:[0,1,0] ; encoding: [0x0c,0x40,0x51,0xcc,0x00,0x09,0x52,0x5c]
+
+[0x0c,0x40,0x51,0xcc,0x00,0x09,0x52,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x41,0x51,0xcc,0x00,0x09,0x52,0x1c] # neg_hi:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 neg_hi:[1,0,0] ; encoding: [0x0c,0x41,0x51,0xcc,0x00,0x09,0x52,0x1c]
+
+[0x0c,0x42,0x51,0xcc,0x00,0x09,0x52,0x1c] # neg_hi:[0,1,0]
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20 neg_hi:[0,1,0] ; encoding: [0x0c,0x42,0x51,0xcc,0x00,0x09,0x52,0x1c]
+
+[0x0c,0x44,0x51,0xcc,0x00,0x09,0x52,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x40,0x51,0xcc,0x00,0x08,0x52,0x1c] # sgpr src0
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v[4:11], v20 ; encoding: [0x0c,0x40,0x51,0xcc,0x00,0x08,0x52,0x1c]
+
+[0x0c,0x40,0x51,0xcc,0x00,0x09,0x50,0x1c] # sgpr src1
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], s[4:11]/*Invalid register, operand has 'VReg_256' register class*/, v20 ; encoding: [0x0c,0x40,0x51,0xcc,0x00,0x09,0x50,0x1c]
+
+[0x0c,0x40,0x51,0xcc,0x00,0x09,0x52,0x18] # sgpr src2
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], s20/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x0c,0x40,0x51,0xcc,0x00,0x09,0x52,0x18]
+
+[0x0c,0x40,0x51,0xcc,0xf2,0x08,0x52,0x1c] # 1.0 src0
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], 1.0/*Invalid immediate*/, v[4:11], v20 ; encoding: [0x0c,0x40,0x51,0xcc,0x00,0x08,0x52,0x1c]
+
+[0x0c,0x40,0x51,0xcc,0x00,0xe5,0x51,0x1c] # 1.0 src1
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], 1.0/*Invalid immediate*/, v20 ; encoding: [0x0c,0x40,0x51,0xcc,0x00,0x01,0x50,0x1c]
+
+[0x0c,0x40,0x51,0xcc,0x00,0x09,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], 1.0/*Invalid immediate*/ ; encoding: [0x0c,0x40,0x51,0xcc,0x00,0x09,0x02,0x18]
+
+[0x0c,0x40,0x51,0xcc,0x00,0x09,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], 1/*Invalid immediate*/ ; encoding: [0x0c,0x40,0x51,0xcc,0x00,0x09,0x06,0x18]
+
+
+
+[0x0c,0x40,0x52,0xcc,0x00,0x09,0x42,0x1c]
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 ; encoding: [0x0c,0x40,0x52,0xcc,0x00,0x09,0x42,0x1c]
+
+[0x0c,0xc0,0x52,0xcc,0x00,0x09,0x42,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x48,0x52,0xcc,0x00,0x09,0x42,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 index_key:1 ; encoding: [0x0c,0x48,0x52,0xcc,0x00,0x09,0x42,0x1c]
+
+[0x0c,0x50,0x52,0xcc,0x00,0x09,0x42,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x60,0x52,0xcc,0x00,0x09,0x42,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x40,0x52,0xcc,0x00,0x09,0x42,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x40,0x52,0xcc,0x00,0x09,0x42,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x00,0x52,0xcc,0x00,0x09,0x42,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x48,0x52,0xcc,0x00,0x09,0x42,0x1c] # index_key:1
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 index_key:1 ; encoding: [0x0c,0x48,0x52,0xcc,0x00,0x09,0x42,0x1c]
+
+[0x0c,0x50,0x52,0xcc,0x00,0x09,0x42,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x58,0x52,0xcc,0x00,0x09,0x42,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x40,0x52,0xcc,0x00,0x09,0x42,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[1,0,0] ; encoding: [0x0c,0x40,0x52,0xcc,0x00,0x09,0x42,0x3c]
+
+[0x0c,0x40,0x52,0xcc,0x00,0x09,0x42,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[0,1,0] ; encoding: [0x0c,0x40,0x52,0xcc,0x00,0x09,0x42,0x5c]
+
+[0x0c,0x40,0x52,0xcc,0x00,0x09,0x42,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x41,0x52,0xcc,0x00,0x09,0x42,0x1c] # neg_hi:[1,0,0]
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_hi:[1,0,0] ; encoding: [0x0c,0x41,0x52,0xcc,0x00,0x09,0x42,0x1c]
+
+[0x0c,0x42,0x52,0xcc,0x00,0x09,0x42,0x1c] # neg_hi:[0,1,0]
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16 neg_hi:[0,1,0] ; encoding: [0x0c,0x42,0x52,0xcc,0x00,0x09,0x42,0x1c]
+
+[0x0c,0x44,0x52,0xcc,0x00,0x09,0x42,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x40,0x52,0xcc,0x00,0x08,0x42,0x1c] # sgpr src0
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v[4:11], v16 ; encoding: [0x0c,0x40,0x52,0xcc,0x00,0x08,0x42,0x1c]
+
+[0x0c,0x40,0x52,0xcc,0x00,0x09,0x40,0x1c] # sgpr src1
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], s[4:11]/*Invalid register, operand has 'VReg_256' register class*/, v16 ; encoding: [0x0c,0x40,0x52,0xcc,0x00,0x09,0x40,0x1c]
+
+[0x0c,0x40,0x52,0xcc,0x00,0x09,0x42,0x18] # sgpr src2
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], s16/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x0c,0x40,0x52,0xcc,0x00,0x09,0x42,0x18]
+
+[0x0c,0x40,0x52,0xcc,0xf2,0x08,0x42,0x1c] # 1.0 src0
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], 1.0/*Invalid immediate*/, v[4:11], v16 ; encoding: [0x0c,0x40,0x52,0xcc,0x00,0x08,0x42,0x1c]
+
+[0x0c,0x40,0x52,0xcc,0x00,0xe5,0x41,0x1c] # 1.0 src1
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], 1.0/*Invalid immediate*/, v16 ; encoding: [0x0c,0x40,0x52,0xcc,0x00,0x01,0x40,0x1c]
+
+[0x0c,0x40,0x52,0xcc,0x00,0x09,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], 1.0/*Invalid immediate*/ ; encoding: [0x0c,0x40,0x52,0xcc,0x00,0x09,0x02,0x18]
+
+[0x0c,0x40,0x52,0xcc,0x00,0x09,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], 1/*Invalid immediate*/ ; encoding: [0x0c,0x40,0x52,0xcc,0x00,0x09,0x06,0x18]
+
+
+
+[0x0c,0x40,0x53,0xcc,0x00,0x09,0x42,0x1c]
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 ; encoding: [0x0c,0x40,0x53,0xcc,0x00,0x09,0x42,0x1c]
+
+[0x0c,0xc0,0x53,0xcc,0x00,0x09,0x42,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x48,0x53,0xcc,0x00,0x09,0x42,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 index_key:1 ; encoding: [0x0c,0x48,0x53,0xcc,0x00,0x09,0x42,0x1c]
+
+[0x0c,0x50,0x53,0xcc,0x00,0x09,0x42,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x60,0x53,0xcc,0x00,0x09,0x42,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x40,0x53,0xcc,0x00,0x09,0x42,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x40,0x53,0xcc,0x00,0x09,0x42,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x00,0x53,0xcc,0x00,0x09,0x42,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x48,0x53,0xcc,0x00,0x09,0x42,0x1c] # index_key:1
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 index_key:1 ; encoding: [0x0c,0x48,0x53,0xcc,0x00,0x09,0x42,0x1c]
+
+[0x0c,0x50,0x53,0xcc,0x00,0x09,0x42,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x58,0x53,0xcc,0x00,0x09,0x42,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x40,0x53,0xcc,0x00,0x09,0x42,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[1,0,0] ; encoding: [0x0c,0x40,0x53,0xcc,0x00,0x09,0x42,0x3c]
+
+[0x0c,0x40,0x53,0xcc,0x00,0x09,0x42,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 neg_lo:[0,1,0] ; encoding: [0x0c,0x40,0x53,0xcc,0x00,0x09,0x42,0x5c]
+
+[0x0c,0x40,0x53,0xcc,0x00,0x09,0x42,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x41,0x53,0xcc,0x00,0x09,0x42,0x1c] # neg_hi:[1,0,0]
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 neg_hi:[1,0,0] ; encoding: [0x0c,0x41,0x53,0xcc,0x00,0x09,0x42,0x1c]
+
+[0x0c,0x42,0x53,0xcc,0x00,0x09,0x42,0x1c] # neg_hi:[0,1,0]
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16 neg_hi:[0,1,0] ; encoding: [0x0c,0x42,0x53,0xcc,0x00,0x09,0x42,0x1c]
+
+[0x0c,0x44,0x53,0xcc,0x00,0x09,0x42,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x0c,0x40,0x53,0xcc,0x00,0x08,0x42,0x1c] # sgpr src0
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v[4:11], v16 ; encoding: [0x0c,0x40,0x53,0xcc,0x00,0x08,0x42,0x1c]
+
+[0x0c,0x40,0x53,0xcc,0x00,0x09,0x40,0x1c] # sgpr src1
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], s[4:11]/*Invalid register, operand has 'VReg_256' register class*/, v16 ; encoding: [0x0c,0x40,0x53,0xcc,0x00,0x09,0x40,0x1c]
+
+[0x0c,0x40,0x53,0xcc,0x00,0x09,0x42,0x18] # sgpr src2
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], s16/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x0c,0x40,0x53,0xcc,0x00,0x09,0x42,0x18]
+
+[0x0c,0x40,0x53,0xcc,0xf2,0x08,0x42,0x1c] # 1.0 src0
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], 1.0/*Invalid immediate*/, v[4:11], v16 ; encoding: [0x0c,0x40,0x53,0xcc,0x00,0x08,0x42,0x1c]
+
+[0x0c,0x40,0x53,0xcc,0x00,0xe5,0x41,0x1c] # 1.0 src1
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], 1.0/*Invalid immediate*/, v16 ; encoding: [0x0c,0x40,0x53,0xcc,0x00,0x01,0x40,0x1c]
+
+[0x0c,0x40,0x53,0xcc,0x00,0x09,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], 1.0/*Invalid immediate*/ ; encoding: [0x0c,0x40,0x53,0xcc,0x00,0x09,0x02,0x18]
+
+[0x0c,0x40,0x53,0xcc,0x00,0x09,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], 1/*Invalid immediate*/ ; encoding: [0x0c,0x40,0x53,0xcc,0x00,0x09,0x06,0x18]
+
+
+
+[0x06,0x40,0x54,0xcc,0x00,0x05,0x3a,0x1c]
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 ; encoding: [0x06,0x40,0x54,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0xc0,0x54,0xcc,0x00,0x05,0x3a,0x1c] # clamp
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 clamp ; encoding: [0x06,0xc0,0x54,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0x48,0x54,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 index_key:1 ; encoding: [0x06,0x48,0x54,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0x50,0x54,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x60,0x54,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x54,0xcc,0x00,0x05,0x3a,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x54,0xcc,0x00,0x05,0x3a,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x00,0x54,0xcc,0x00,0x05,0x3a,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x54,0xcc,0x00,0x05,0x3a,0x1c] # index_key:1
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 index_key:1 ; encoding: [0x06,0x48,0x54,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0x50,0x54,0xcc,0x00,0x05,0x3a,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x58,0x54,0xcc,0x00,0x05,0x3a,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x54,0xcc,0x00,0x05,0x3a,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0] ; encoding: [0x06,0x40,0x54,0xcc,0x00,0x05,0x3a,0x3c]
+
+[0x06,0x40,0x54,0xcc,0x00,0x05,0x3a,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0] ; encoding: [0x06,0x40,0x54,0xcc,0x00,0x05,0x3a,0x5c]
+
+[0x06,0x40,0x54,0xcc,0x00,0x05,0x3a,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x41,0x54,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x42,0x54,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x44,0x54,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x54,0xcc,0x00,0x04,0x3a,0x1c] # sgpr src0
+# GFX1170:v_swmmac_i32_16x16x32_iu8 v[6:13], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:5], v14 ; encoding: [0x06,0x40,0x54,0xcc,0x00,0x04,0x3a,0x1c]
+
+[0x06,0x40,0x54,0xcc,0x00,0x05,0x38,0x1c] # sgpr src1
+# GFX1170:v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v14 ; encoding: [0x06,0x40,0x54,0xcc,0x00,0x01,0x38,0x1c]
+
+[0x06,0x40,0x54,0xcc,0x00,0x05,0x3a,0x18] # sgpr src2
+# GFX1170:v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], s14/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x06,0x40,0x54,0xcc,0x00,0x05,0x3a,0x18]
+
+[0x06,0x40,0x54,0xcc,0x81,0x04,0x3a,0x1c] # 1 src0
+# GFX1170:v_swmmac_i32_16x16x32_iu8 v[6:13], 1/*Invalid immediate*/, v[2:5], v14 ; encoding: [0x06,0x40,0x54,0xcc,0x01,0x04,0x3a,0x1c]
+
+[0x06,0x40,0x54,0xcc,0x00,0x03,0x39,0x1c] # 1 src1
+# GFX1170:v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], 1/*Invalid immediate*/, v14 ; encoding: [0x06,0x40,0x54,0xcc,0x00,0x03,0x38,0x1c]
+
+[0x06,0x40,0x54,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170:v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], 1/*Invalid immediate*/ ; encoding: [0x06,0x40,0x54,0xcc,0x00,0x05,0x06,0x18]
+
+[0x06,0x40,0x54,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170:v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], 1.0/*Invalid immediate*/ ; encoding: [0x06,0x40,0x54,0xcc,0x00,0x05,0x02,0x18]
+
+
+
+[0x03,0x40,0x55,0xcc,0x00,0x03,0x2e,0x1c]
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 ; encoding: [0x03,0x40,0x55,0xcc,0x00,0x03,0x2e,0x1c]
+
+[0x03,0xc0,0x55,0xcc,0x00,0x03,0x2e,0x1c] # clamp
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 clamp ; encoding: [0x03,0xc0,0x55,0xcc,0x00,0x03,0x2e,0x1c]
+
+[0x03,0x48,0x55,0xcc,0x00,0x03,0x2e,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 index_key:1 ; encoding: [0x03,0x48,0x55,0xcc,0x00,0x03,0x2e,0x1c]
+
+[0x03,0x50,0x55,0xcc,0x00,0x03,0x2e,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x60,0x55,0xcc,0x00,0x03,0x2e,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x55,0xcc,0x00,0x03,0x2e,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x55,0xcc,0x00,0x03,0x2e,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x00,0x55,0xcc,0x00,0x03,0x2e,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x48,0x55,0xcc,0x00,0x03,0x2e,0x1c] # index_key:1
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 index_key:1 ; encoding: [0x03,0x48,0x55,0xcc,0x00,0x03,0x2e,0x1c]
+
+[0x03,0x50,0x55,0xcc,0x00,0x03,0x2e,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x58,0x55,0xcc,0x00,0x03,0x2e,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x55,0xcc,0x00,0x03,0x2e,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_lo:[1,0,0] ; encoding: [0x03,0x40,0x55,0xcc,0x00,0x03,0x2e,0x3c]
+
+[0x03,0x40,0x55,0xcc,0x00,0x03,0x2e,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11 neg_lo:[0,1,0] ; encoding: [0x03,0x40,0x55,0xcc,0x00,0x03,0x2e,0x5c]
+
+[0x03,0x40,0x55,0xcc,0x00,0x03,0x2e,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x41,0x55,0xcc,0x00,0x03,0x2e,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x42,0x55,0xcc,0x00,0x03,0x2e,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x44,0x55,0xcc,0x00,0x03,0x2e,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x55,0xcc,0x00,0x02,0x2e,0x1c] # sgpr src0
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], s0/*Invalid register, operand has 'VGPR_32' register class*/, v[1:2], v11 ; encoding: [0x03,0x40,0x55,0xcc,0x00,0x02,0x2e,0x1c]
+
+[0x03,0x40,0x55,0xcc,0x00,0x03,0x2c,0x1c] # sgpr src1
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v11 ; encoding: [0x03,0x40,0x55,0xcc,0x00,0x01,0x2c,0x1c]
+
+[0x03,0x40,0x55,0xcc,0x00,0x03,0x2e,0x18] # sgpr src2
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], s11/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x03,0x40,0x55,0xcc,0x00,0x03,0x2e,0x18]
+
+[0x03,0x40,0x55,0xcc,0x81,0x02,0x2e,0x1c] # 1 src0
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], 1/*Invalid immediate*/, v[1:2], v11 ; encoding: [0x03,0x40,0x55,0xcc,0x01,0x02,0x2e,0x1c]
+
+[0x03,0x40,0x55,0xcc,0x00,0x03,0x2d,0x1c] # 1 src1
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, 1/*Invalid immediate*/, v11 ; encoding: [0x03,0x40,0x55,0xcc,0x00,0x03,0x2c,0x1c]
+
+[0x03,0x40,0x55,0xcc,0x00,0x03,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], 1/*Invalid immediate*/ ; encoding: [0x03,0x40,0x55,0xcc,0x00,0x03,0x06,0x18]
+
+[0x03,0x40,0x55,0xcc,0x00,0x03,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], 1.0/*Invalid immediate*/ ; encoding: [0x03,0x40,0x55,0xcc,0x00,0x03,0x02,0x18]
+
+
+
+[0x06,0x40,0x56,0xcc,0x00,0x05,0x3a,0x1c]
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 ; encoding: [0x06,0x40,0x56,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0xc0,0x56,0xcc,0x00,0x05,0x3a,0x1c] # clamp
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 clamp ; encoding: [0x06,0xc0,0x56,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0x48,0x56,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x50,0x56,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x60,0x56,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x56,0xcc,0x00,0x05,0x3a,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x56,0xcc,0x00,0x05,0x3a,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x00,0x56,0xcc,0x00,0x05,0x3a,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x56,0xcc,0x00,0x05,0x3a,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x50,0x56,0xcc,0x00,0x05,0x3a,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x58,0x56,0xcc,0x00,0x05,0x3a,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x56,0xcc,0x00,0x05,0x3a,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_lo:[1,0,0] ; encoding: [0x06,0x40,0x56,0xcc,0x00,0x05,0x3a,0x3c]
+
+[0x06,0x40,0x56,0xcc,0x00,0x05,0x3a,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14 neg_lo:[0,1,0] ; encoding: [0x06,0x40,0x56,0xcc,0x00,0x05,0x3a,0x5c]
+
+[0x06,0x40,0x56,0xcc,0x00,0x05,0x3a,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x41,0x56,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x42,0x56,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x44,0x56,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x56,0xcc,0x00,0x04,0x3a,0x1c] # sgpr src0
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[6:13], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:5], v14 ; encoding: [0x06,0x40,0x56,0xcc,0x00,0x04,0x3a,0x1c]
+
+[0x06,0x40,0x56,0xcc,0x00,0x05,0x38,0x1c] # sgpr src1
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v14 ; encoding: [0x06,0x40,0x56,0xcc,0x00,0x01,0x38,0x1c]
+
+[0x06,0x40,0x56,0xcc,0x00,0x05,0x3a,0x18] # sgpr src2
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], s14/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x06,0x40,0x56,0xcc,0x00,0x05,0x3a,0x18]
+
+[0x06,0x40,0x56,0xcc,0x81,0x04,0x3a,0x1c] # 1 src0
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[6:13], 1/*Invalid immediate*/, v[2:5], v14 ; encoding: [0x06,0x40,0x56,0xcc,0x01,0x04,0x3a,0x1c]
+
+[0x06,0x40,0x56,0xcc,0x00,0x03,0x39,0x1c] # 1 src1
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], 1/*Invalid immediate*/, v14 ; encoding: [0x06,0x40,0x56,0xcc,0x00,0x03,0x38,0x1c]
+
+[0x06,0x40,0x56,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], 1/*Invalid immediate*/ ; encoding: [0x06,0x40,0x56,0xcc,0x00,0x05,0x06,0x18]
+
+[0x06,0x40,0x56,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], 1.0/*Invalid immediate*/ ; encoding: [0x06,0x40,0x56,0xcc,0x00,0x05,0x02,0x18]
+
+
+
+[0x06,0x40,0x57,0xcc,0x00,0x05,0x3a,0x1c]
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 ; encoding: [0x06,0x40,0x57,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0xc0,0x57,0xcc,0x00,0x05,0x3a,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x57,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:1 ; encoding: [0x06,0x48,0x57,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0x50,0x57,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x60,0x57,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x57,0xcc,0x00,0x05,0x3a,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x57,0xcc,0x00,0x05,0x3a,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x00,0x57,0xcc,0x00,0x05,0x3a,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x57,0xcc,0x00,0x05,0x3a,0x1c] # index_key:1
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:1 ; encoding: [0x06,0x48,0x57,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0x50,0x57,0xcc,0x00,0x05,0x3a,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x58,0x57,0xcc,0x00,0x05,0x3a,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x57,0xcc,0x00,0x05,0x3a,0x3c] # neg_lo:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x57,0xcc,0x00,0x05,0x3a,0x5c] # neg_lo:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x57,0xcc,0x00,0x05,0x3a,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x41,0x57,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x42,0x57,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x44,0x57,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x57,0xcc,0x00,0x04,0x3a,0x1c] # sgpr src0
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:5], v14 ; encoding: [0x06,0x40,0x57,0xcc,0x00,0x04,0x3a,0x1c]
+
+[0x06,0x40,0x57,0xcc,0x00,0x05,0x38,0x1c] # sgpr src1
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v14 ; encoding: [0x06,0x40,0x57,0xcc,0x00,0x01,0x38,0x1c]
+
+[0x06,0x40,0x57,0xcc,0x00,0x05,0x3a,0x18] # sgpr src2
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], s14/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x06,0x40,0x57,0xcc,0x00,0x05,0x3a,0x18]
+
+[0x06,0x40,0x57,0xcc,0xf2,0x04,0x3a,0x1c] # 1.0 src0
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], 1.0/*Invalid immediate*/, v[2:5], v14 ; encoding: [0x06,0x40,0x57,0xcc,0x00,0x04,0x3a,0x1c]
+
+[0x06,0x40,0x57,0xcc,0x00,0xe5,0x39,0x1c] # 1.0 src1
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], 1.0/*Invalid immediate*/, v14 ; encoding: [0x06,0x40,0x57,0xcc,0x00,0x01,0x38,0x1c]
+
+[0x06,0x40,0x57,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], 1.0/*Invalid immediate*/ ; encoding: [0x06,0x40,0x57,0xcc,0x00,0x05,0x02,0x18]
+
+[0x06,0x40,0x57,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], 1/*Invalid immediate*/ ; encoding: [0x06,0x40,0x57,0xcc,0x00,0x05,0x06,0x18]
+
+
+
+[0x06,0x40,0x58,0xcc,0x00,0x05,0x3a,0x1c]
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 ; encoding: [0x06,0x40,0x58,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0xc0,0x58,0xcc,0x00,0x05,0x3a,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x58,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:1 ; encoding: [0x06,0x48,0x58,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0x50,0x58,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x60,0x58,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x58,0xcc,0x00,0x05,0x3a,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x58,0xcc,0x00,0x05,0x3a,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x00,0x58,0xcc,0x00,0x05,0x3a,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x58,0xcc,0x00,0x05,0x3a,0x1c] # index_key:1
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:1 ; encoding: [0x06,0x48,0x58,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0x50,0x58,0xcc,0x00,0x05,0x3a,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x58,0x58,0xcc,0x00,0x05,0x3a,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x58,0xcc,0x00,0x05,0x3a,0x3c] # neg_lo:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x58,0xcc,0x00,0x05,0x3a,0x5c] # neg_lo:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x58,0xcc,0x00,0x05,0x3a,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x41,0x58,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x42,0x58,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x44,0x58,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x58,0xcc,0x00,0x04,0x3a,0x1c] # sgpr src0
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:5], v14 ; encoding: [0x06,0x40,0x58,0xcc,0x00,0x04,0x3a,0x1c]
+
+[0x06,0x40,0x58,0xcc,0x00,0x05,0x38,0x1c] # sgpr src1
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v14 ; encoding: [0x06,0x40,0x58,0xcc,0x00,0x01,0x38,0x1c]
+
+[0x06,0x40,0x58,0xcc,0x00,0x05,0x3a,0x18] # sgpr src2
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], s14/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x06,0x40,0x58,0xcc,0x00,0x05,0x3a,0x18]
+
+[0x06,0x40,0x58,0xcc,0xf2,0x04,0x3a,0x1c] # 1.0 src0
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], 1.0/*Invalid immediate*/, v[2:5], v14 ; encoding: [0x06,0x40,0x58,0xcc,0x00,0x04,0x3a,0x1c]
+
+[0x06,0x40,0x58,0xcc,0x00,0xe5,0x39,0x1c] # 1.0 src1
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], 1.0/*Invalid immediate*/, v14 ; encoding: [0x06,0x40,0x58,0xcc,0x00,0x01,0x38,0x1c]
+
+[0x06,0x40,0x58,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], 1.0/*Invalid immediate*/ ; encoding: [0x06,0x40,0x58,0xcc,0x00,0x05,0x02,0x18]
+
+[0x06,0x40,0x58,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], 1/*Invalid immediate*/ ; encoding: [0x06,0x40,0x58,0xcc,0x00,0x05,0x06,0x18]
+
+
+
+[0x06,0x40,0x59,0xcc,0x00,0x05,0x3a,0x1c]
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 ; encoding: [0x06,0x40,0x59,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0xc0,0x59,0xcc,0x00,0x05,0x3a,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x59,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:1 ; encoding: [0x06,0x48,0x59,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0x50,0x59,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x60,0x59,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x59,0xcc,0x00,0x05,0x3a,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x59,0xcc,0x00,0x05,0x3a,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x00,0x59,0xcc,0x00,0x05,0x3a,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x59,0xcc,0x00,0x05,0x3a,0x1c] # index_key:1
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14 index_key:1 ; encoding: [0x06,0x48,0x59,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0x50,0x59,0xcc,0x00,0x05,0x3a,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x58,0x59,0xcc,0x00,0x05,0x3a,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x59,0xcc,0x00,0x05,0x3a,0x3c] # neg_lo:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x59,0xcc,0x00,0x05,0x3a,0x5c] # neg_lo:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x59,0xcc,0x00,0x05,0x3a,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x41,0x59,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x42,0x59,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x44,0x59,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x59,0xcc,0x00,0x04,0x3a,0x1c] # sgpr src0
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:5], v14 ; encoding: [0x06,0x40,0x59,0xcc,0x00,0x04,0x3a,0x1c]
+
+[0x06,0x40,0x59,0xcc,0x00,0x05,0x38,0x1c] # sgpr src1
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v14 ; encoding: [0x06,0x40,0x59,0xcc,0x00,0x01,0x38,0x1c]
+
+[0x06,0x40,0x59,0xcc,0x00,0x05,0x3a,0x18] # sgpr src2
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], s14/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x06,0x40,0x59,0xcc,0x00,0x05,0x3a,0x18]
+
+[0x06,0x40,0x59,0xcc,0xf2,0x04,0x3a,0x1c] # 1.0 src0
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], 1.0/*Invalid immediate*/, v[2:5], v14 ; encoding: [0x06,0x40,0x59,0xcc,0x00,0x04,0x3a,0x1c]
+
+[0x06,0x40,0x59,0xcc,0x00,0xe5,0x39,0x1c] # 1.0 src1
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], 1.0/*Invalid immediate*/, v14 ; encoding: [0x06,0x40,0x59,0xcc,0x00,0x01,0x38,0x1c]
+
+[0x06,0x40,0x59,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], 1.0/*Invalid immediate*/ ; encoding: [0x06,0x40,0x59,0xcc,0x00,0x05,0x02,0x18]
+
+[0x06,0x40,0x59,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], 1/*Invalid immediate*/ ; encoding: [0x06,0x40,0x59,0xcc,0x00,0x05,0x06,0x18]
+
+
+
+[0x06,0x40,0x5a,0xcc,0x00,0x05,0x3a,0x1c]
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 ; encoding: [0x06,0x40,0x5a,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0xc0,0x5a,0xcc,0x00,0x05,0x3a,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x5a,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:1 ; encoding: [0x06,0x48,0x5a,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0x50,0x5a,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x60,0x5a,0xcc,0x00,0x05,0x3a,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x5a,0xcc,0x00,0x05,0x3a,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x5a,0xcc,0x00,0x05,0x3a,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x00,0x5a,0xcc,0x00,0x05,0x3a,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x5a,0xcc,0x00,0x05,0x3a,0x1c] # index_key:1
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14 index_key:1 ; encoding: [0x06,0x48,0x5a,0xcc,0x00,0x05,0x3a,0x1c]
+
+[0x06,0x50,0x5a,0xcc,0x00,0x05,0x3a,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x58,0x5a,0xcc,0x00,0x05,0x3a,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x5a,0xcc,0x00,0x05,0x3a,0x3c] # neg_lo:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x5a,0xcc,0x00,0x05,0x3a,0x5c] # neg_lo:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x5a,0xcc,0x00,0x05,0x3a,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x41,0x5a,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x42,0x5a,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x44,0x5a,0xcc,0x00,0x05,0x3a,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x5a,0xcc,0x00,0x04,0x3a,0x1c] # sgpr src0
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:5], v14 ; encoding: [0x06,0x40,0x5a,0xcc,0x00,0x04,0x3a,0x1c]
+
+[0x06,0x40,0x5a,0xcc,0x00,0x05,0x38,0x1c] # sgpr src1
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v14 ; encoding: [0x06,0x40,0x5a,0xcc,0x00,0x01,0x38,0x1c]
+
+[0x06,0x40,0x5a,0xcc,0x00,0x05,0x3a,0x18] # sgpr src2
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], s14/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x06,0x40,0x5a,0xcc,0x00,0x05,0x3a,0x18]
+
+[0x06,0x40,0x5a,0xcc,0xf2,0x04,0x3a,0x1c] # 1.0 src0
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], 1.0/*Invalid immediate*/, v[2:5], v14 ; encoding: [0x06,0x40,0x5a,0xcc,0x00,0x04,0x3a,0x1c]
+
+[0x06,0x40,0x5a,0xcc,0x00,0xe5,0x39,0x1c] # 1.0 src1
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], 1.0/*Invalid immediate*/, v14 ; encoding: [0x06,0x40,0x5a,0xcc,0x00,0x01,0x38,0x1c]
+
+[0x06,0x40,0x5a,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], 1.0/*Invalid immediate*/ ; encoding: [0x06,0x40,0x5a,0xcc,0x00,0x05,0x02,0x18]
+
+[0x06,0x40,0x5a,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], 1/*Invalid immediate*/ ; encoding: [0x06,0x40,0x5a,0xcc,0x00,0x05,0x06,0x18]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_wmma_w64.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_wmma_w64.txt
new file mode 100644
index 0000000000000..169fd20488e37
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_wmma_w64.txt
@@ -0,0 +1,1628 @@
+# RUN: not llvm-mc -disassemble -triple=amdgcn -mcpu=gfx1170 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX1170 %s
+# RUN: not llvm-mc -disassemble -triple=amdgcn -mcpu=gfx1170 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --implicit-check-not=warning: --check-prefix=GFX1170-ERR %s
+
+[0x04,0x40,0x40,0xcc,0x00,0x05,0x12,0x1c]
+# GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] ; encoding: [0x04,0x40,0x40,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0xc0,0x40,0xcc,0x00,0x05,0x12,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x40,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x40,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x60,0x40,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x40,0xcc,0x00,0x05,0x12,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x40,0xcc,0x00,0x05,0x12,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x00,0x40,0xcc,0x00,0x05,0x12,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x40,0xcc,0x00,0x05,0x12,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x40,0xcc,0x00,0x05,0x12,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x58,0x40,0xcc,0x00,0x05,0x12,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x40,0xcc,0x00,0x05,0x12,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0] ; encoding: [0x04,0x40,0x40,0xcc,0x00,0x05,0x12,0x3c]
+
+[0x04,0x40,0x40,0xcc,0x00,0x05,0x12,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0] ; encoding: [0x04,0x40,0x40,0xcc,0x00,0x05,0x12,0x5c]
+
+[0x04,0x40,0x40,0xcc,0x00,0x05,0x12,0x9c] # neg_lo:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x40,0xcc,0x00,0x05,0x12,0x9c]
+
+[0x04,0x41,0x40,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[1,0,0]
+# GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[1,0,0] ; encoding: [0x04,0x41,0x40,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x42,0x40,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,1,0]
+# GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,1,0] ; encoding: [0x04,0x42,0x40,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x44,0x40,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x40,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x40,0x40,0xcc,0x00,0x04,0x12,0x1c] # sgpr_0 src0
+# GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:3], v[4:7] ; encoding: [0x04,0x40,0x40,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x40,0xcc,0x00,0x01,0x10,0x1c] # sgpr_0 src1
+# GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[4:7] ; encoding: [0x04,0x40,0x40,0xcc,0x00,0x01,0x10,0x1c]
+
+[0x04,0x40,0x40,0xcc,0x00,0x05,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/ ; encoding: [0x04,0x40,0x40,0xcc,0x00,0x05,0x02,0x18]
+
+[0x04,0x40,0x40,0xcc,0xf2,0x04,0x12,0x1c] # 1.0 src0
+# GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], 1.0/*Invalid immediate*/, v[2:3], v[4:7] ; encoding: [0x04,0x40,0x40,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x40,0xcc,0x00,0xe5,0x11,0x1c] # 1.0 src1
+# GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], 1.0/*Invalid immediate*/, v[4:7] ; encoding: [0x04,0x40,0x40,0xcc,0x00,0x01,0x10,0x1c]
+
+[0x04,0x40,0x40,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x40,0xcc,0x00,0x05,0xca,0x1b]
+
+[0x04,0x40,0x40,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_f32_16x16x16_f16 v[4:7], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x40,0xcc,0x00,0x05,0x06,0x1a]
+
+
+
+[0x04,0x40,0x41,0xcc,0x00,0x05,0x12,0x1c]
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] ; encoding: [0x04,0x40,0x41,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0xc0,0x41,0xcc,0x00,0x05,0x12,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x41,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x41,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x60,0x41,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x41,0xcc,0x00,0x05,0x12,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x41,0xcc,0x00,0x05,0x12,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x00,0x41,0xcc,0x00,0x05,0x12,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x41,0xcc,0x00,0x05,0x12,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x41,0xcc,0x00,0x05,0x12,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x58,0x41,0xcc,0x00,0x05,0x12,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x41,0xcc,0x00,0x05,0x12,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0] ; encoding: [0x04,0x40,0x41,0xcc,0x00,0x05,0x12,0x3c]
+
+[0x04,0x40,0x41,0xcc,0x00,0x05,0x12,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0] ; encoding: [0x04,0x40,0x41,0xcc,0x00,0x05,0x12,0x5c]
+
+[0x04,0x40,0x41,0xcc,0x00,0x05,0x12,0x9c] # neg_lo:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x41,0xcc,0x00,0x05,0x12,0x9c]
+
+[0x04,0x41,0x41,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[1,0,0]
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[1,0,0] ; encoding: [0x04,0x41,0x41,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x42,0x41,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,1,0]
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,1,0] ; encoding: [0x04,0x42,0x41,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x44,0x41,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], v[4:7] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x41,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x40,0x41,0xcc,0x00,0x04,0x12,0x1c] # sgpr_0 src0
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:3], v[4:7] ; encoding: [0x04,0x40,0x41,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x41,0xcc,0x00,0x01,0x10,0x1c] # sgpr_0 src1
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[4:7] ; encoding: [0x04,0x40,0x41,0xcc,0x00,0x01,0x10,0x1c]
+
+[0x04,0x40,0x41,0xcc,0x00,0x05,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/ ; encoding: [0x04,0x40,0x41,0xcc,0x00,0x05,0x02,0x18]
+
+[0x04,0x40,0x41,0xcc,0xf2,0x04,0x12,0x1c] # 1.0 src0
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], 1.0/*Invalid immediate*/, v[2:3], v[4:7] ; encoding: [0x04,0x40,0x41,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x41,0xcc,0x00,0xe5,0x11,0x1c] # 1.0 src1
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], 1.0/*Invalid immediate*/, v[4:7] ; encoding: [0x04,0x40,0x41,0xcc,0x00,0x01,0x10,0x1c]
+
+[0x04,0x40,0x41,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x41,0xcc,0x00,0x05,0xca,0x1b]
+
+[0x04,0x40,0x41,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_f32_16x16x16_bf16 v[4:7], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x41,0xcc,0x00,0x05,0x06,0x1a]
+
+
+
+[0x04,0x40,0x42,0xcc,0x00,0x05,0x12,0x1c]
+# GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] ; encoding: [0x04,0x40,0x42,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0xc0,0x42,0xcc,0x00,0x05,0x12,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x42,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x42,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x60,0x42,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x42,0xcc,0x00,0x05,0x12,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x42,0xcc,0x00,0x05,0x12,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x00,0x42,0xcc,0x00,0x05,0x12,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x42,0xcc,0x00,0x05,0x12,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x42,0xcc,0x00,0x05,0x12,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x58,0x42,0xcc,0x00,0x05,0x12,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x42,0xcc,0x00,0x05,0x12,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[1,0,0] ; encoding: [0x04,0x40,0x42,0xcc,0x00,0x05,0x12,0x3c]
+
+[0x04,0x40,0x42,0xcc,0x00,0x05,0x12,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,1,0] ; encoding: [0x04,0x40,0x42,0xcc,0x00,0x05,0x12,0x5c]
+
+[0x04,0x40,0x42,0xcc,0x00,0x05,0x12,0x9c] # neg_lo:[0,0,1]
+# GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x42,0xcc,0x00,0x05,0x12,0x9c]
+
+[0x04,0x41,0x42,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[1,0,0]
+# GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[1,0,0] ; encoding: [0x04,0x41,0x42,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x42,0x42,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,1,0]
+# GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[0,1,0] ; encoding: [0x04,0x42,0x42,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x44,0x42,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,0,1]
+# GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x42,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x40,0x42,0xcc,0x00,0x04,0x12,0x1c] # sgpr_0 src0
+# GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:3], v[4:5] ; encoding: [0x04,0x40,0x42,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x42,0xcc,0x00,0x01,0x10,0x1c] # sgpr_0 src1
+# GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[4:5] ; encoding: [0x04,0x40,0x42,0xcc,0x00,0x01,0x10,0x1c]
+
+[0x04,0x40,0x42,0xcc,0x00,0x05,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/ ; encoding: [0x04,0x40,0x42,0xcc,0x00,0x05,0x02,0x18]
+
+[0x04,0x40,0x42,0xcc,0xf2,0x04,0x12,0x1c] # 1.0 src0
+# GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], 1.0/*Invalid immediate*/, v[2:3], v[4:5] ; encoding: [0x04,0x40,0x42,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x42,0xcc,0x00,0xe5,0x11,0x1c] # 1.0 src1
+# GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], 1.0/*Invalid immediate*/, v[4:5] ; encoding: [0x04,0x40,0x42,0xcc,0x00,0x01,0x10,0x1c]
+
+[0x04,0x40,0x42,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x42,0xcc,0x00,0x05,0xca,0x1b]
+
+[0x04,0x40,0x42,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_f16_16x16x16_f16 v[4:5], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x42,0xcc,0x00,0x05,0x06,0x1a]
+
+
+
+[0x04,0x40,0x43,0xcc,0x00,0x05,0x12,0x1c]
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] ; encoding: [0x04,0x40,0x43,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0xc0,0x43,0xcc,0x00,0x05,0x12,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x43,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x43,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x60,0x43,0xcc,0x00,0x05,0x12,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x43,0xcc,0x00,0x05,0x12,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x43,0xcc,0x00,0x05,0x12,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x00,0x43,0xcc,0x00,0x05,0x12,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x48,0x43,0xcc,0x00,0x05,0x12,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x50,0x43,0xcc,0x00,0x05,0x12,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x58,0x43,0xcc,0x00,0x05,0x12,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x04,0x40,0x43,0xcc,0x00,0x05,0x12,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[1,0,0] ; encoding: [0x04,0x40,0x43,0xcc,0x00,0x05,0x12,0x3c]
+
+[0x04,0x40,0x43,0xcc,0x00,0x05,0x12,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,1,0] ; encoding: [0x04,0x40,0x43,0xcc,0x00,0x05,0x12,0x5c]
+
+[0x04,0x40,0x43,0xcc,0x00,0x05,0x12,0x9c] # neg_lo:[0,0,1]
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1] ; encoding: [0x04,0x40,0x43,0xcc,0x00,0x05,0x12,0x9c]
+
+[0x04,0x41,0x43,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[1,0,0]
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[1,0,0] ; encoding: [0x04,0x41,0x43,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x42,0x43,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,1,0]
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[0,1,0] ; encoding: [0x04,0x42,0x43,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x44,0x43,0xcc,0x00,0x05,0x12,0x1c] # neg_hi:[0,0,1]
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], v[4:5] neg_hi:[0,0,1] ; encoding: [0x04,0x44,0x43,0xcc,0x00,0x05,0x12,0x1c]
+
+[0x04,0x40,0x43,0xcc,0x00,0x04,0x12,0x1c] # sgpr_0 src0
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:3], v[4:5] ; encoding: [0x04,0x40,0x43,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x43,0xcc,0x00,0x01,0x10,0x1c] # sgpr_0 src1
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[4:5] ; encoding: [0x04,0x40,0x43,0xcc,0x00,0x01,0x10,0x1c]
+
+[0x04,0x40,0x43,0xcc,0x00,0x05,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/ ; encoding: [0x04,0x40,0x43,0xcc,0x00,0x05,0x02,0x18]
+
+[0x04,0x40,0x43,0xcc,0xf2,0x04,0x12,0x1c] # 1.0 src0
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], 1.0/*Invalid immediate*/, v[2:3], v[4:5] ; encoding: [0x04,0x40,0x43,0xcc,0x00,0x04,0x12,0x1c]
+
+[0x04,0x40,0x43,0xcc,0x00,0xe5,0x11,0x1c] # 1.0 src1
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], 1.0/*Invalid immediate*/, v[4:5] ; encoding: [0x04,0x40,0x43,0xcc,0x00,0x01,0x10,0x1c]
+
+[0x04,0x40,0x43,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], 1.0 ; encoding: [0x04,0x40,0x43,0xcc,0x00,0x05,0xca,0x1b]
+
+[0x04,0x40,0x43,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_bf16_16x16x16_bf16 v[4:5], v[0:1], v[2:3], 1 ; encoding: [0x04,0x40,0x43,0xcc,0x00,0x05,0x06,0x1a]
+
+
+
+[0x02,0x40,0x44,0xcc,0x00,0x03,0x0a,0x1c]
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] ; encoding: [0x02,0x40,0x44,0xcc,0x00,0x03,0x0a,0x1c]
+
+[0x02,0xc0,0x44,0xcc,0x00,0x03,0x0a,0x1c] # clamp
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] clamp ; encoding: [0x02,0xc0,0x44,0xcc,0x00,0x03,0x0a,0x1c]
+
+[0x02,0x48,0x44,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x50,0x44,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x60,0x44,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x44,0xcc,0x00,0x03,0x0a,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x44,0xcc,0x00,0x03,0x0a,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x00,0x44,0xcc,0x00,0x03,0x0a,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x48,0x44,0xcc,0x00,0x03,0x0a,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x50,0x44,0xcc,0x00,0x03,0x0a,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x58,0x44,0xcc,0x00,0x03,0x0a,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x44,0xcc,0x00,0x03,0x0a,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0] ; encoding: [0x02,0x40,0x44,0xcc,0x00,0x03,0x0a,0x3c]
+
+[0x02,0x40,0x44,0xcc,0x00,0x03,0x0a,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0] ; encoding: [0x02,0x40,0x44,0xcc,0x00,0x03,0x0a,0x5c]
+
+[0x02,0x40,0x44,0xcc,0x00,0x03,0x0a,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x41,0x44,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x42,0x44,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x44,0x44,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x44,0xcc,0x00,0x02,0x0a,0x1c] # sgpr_0 src0
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[2:5], s0/*Invalid register, operand has 'VGPR_32' register class*/, v1, v[2:5] ; encoding: [0x02,0x40,0x44,0xcc,0x00,0x02,0x0a,0x1c]
+
+[0x02,0x40,0x44,0xcc,0x00,0x01,0x08,0x1c] # sgpr_0 src1
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[2:5], v0, s0/*Invalid register, operand has 'VGPR_32' register class*/, v[2:5] ; encoding: [0x02,0x40,0x44,0xcc,0x00,0x01,0x08,0x1c]
+
+[0x02,0x40,0x44,0xcc,0x00,0x03,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, s[0:3]/*Invalid register, operand has 'VReg_128' register class*/ ; encoding: [0x02,0x40,0x44,0xcc,0x00,0x03,0x02,0x18]
+
+[0x02,0x40,0x44,0xcc,0x81,0x02,0x0a,0x1c] # 1 src0
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[2:5], 1/*Invalid immediate*/, v1, v[2:5] ; encoding: [0x02,0x40,0x44,0xcc,0x01,0x02,0x0a,0x1c]
+
+[0x02,0x40,0x44,0xcc,0x00,0x03,0x09,0x1c] # 1 src1
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[2:5], v0, 1/*Invalid immediate*/, v[2:5] ; encoding: [0x02,0x40,0x44,0xcc,0x00,0x03,0x08,0x1c]
+
+[0x02,0x40,0x44,0xcc,0x00,0x03,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, 1 ; encoding: [0x02,0x40,0x44,0xcc,0x00,0x03,0x06,0x1a]
+
+[0x02,0x40,0x44,0xcc,0x00,0x03,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_i32_16x16x16_iu8 v[2:5], v0, v1, 1.0 ; encoding: [0x02,0x40,0x44,0xcc,0x00,0x03,0xca,0x1b]
+
+
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x1c]
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x1c]
+
+[0x02,0xc0,0x45,0xcc,0x00,0x03,0x0a,0x1c] # clamp
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] clamp ; encoding: [0x02,0xc0,0x45,0xcc,0x00,0x03,0x0a,0x1c]
+
+[0x02,0x48,0x45,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x50,0x45,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x60,0x45,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x00,0x45,0xcc,0x00,0x03,0x0a,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x48,0x45,0xcc,0x00,0x03,0x0a,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x50,0x45,0xcc,0x00,0x03,0x0a,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x58,0x45,0xcc,0x00,0x03,0x0a,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x3c]
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x5c]
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x0a,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x41,0x45,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x42,0x45,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x44,0x45,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x45,0xcc,0x00,0x02,0x0a,0x1c] # sgpr_0 src0
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:5], s0/*Invalid register, operand has 'VGPR_32' register class*/, v1, v[2:5] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x02,0x0a,0x1c]
+
+[0x02,0x40,0x45,0xcc,0x00,0x01,0x08,0x1c] # sgpr_0 src1
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:5], v0, s0/*Invalid register, operand has 'VGPR_32' register class*/, v[2:5] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x01,0x08,0x1c]
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, s[0:3]/*Invalid register, operand has 'VReg_128' register class*/ ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x02,0x18]
+
+[0x02,0x40,0x45,0xcc,0x81,0x02,0x0a,0x1c] # 1 src0
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:5], 1/*Invalid immediate*/, v1, v[2:5] ; encoding: [0x02,0x40,0x45,0xcc,0x01,0x02,0x0a,0x1c]
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x09,0x1c] # 1 src1
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:5], v0, 1/*Invalid immediate*/, v[2:5] ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x08,0x1c]
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, 1 ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0x06,0x1a]
+
+[0x02,0x40,0x45,0xcc,0x00,0x03,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_i32_16x16x16_iu4 v[2:5], v0, v1, 1.0 ; encoding: [0x02,0x40,0x45,0xcc,0x00,0x03,0xca,0x1b]
+
+
+
+[0x02,0x40,0x46,0xcc,0x00,0x03,0x0a,0x1c]
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] ; encoding: [0x02,0x40,0x46,0xcc,0x00,0x03,0x0a,0x1c]
+
+[0x02,0xc0,0x46,0xcc,0x00,0x03,0x0a,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x48,0x46,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x50,0x46,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x60,0x46,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x46,0xcc,0x00,0x03,0x0a,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x46,0xcc,0x00,0x03,0x0a,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x00,0x46,0xcc,0x00,0x03,0x0a,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x48,0x46,0xcc,0x00,0x03,0x0a,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x50,0x46,0xcc,0x00,0x03,0x0a,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x58,0x46,0xcc,0x00,0x03,0x0a,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x46,0xcc,0x00,0x03,0x0a,0x3c] # neg_lo:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x46,0xcc,0x00,0x03,0x0a,0x5c] # neg_lo:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x46,0xcc,0x00,0x03,0x0a,0x9c] # neg_lo:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1] ; encoding: [0x02,0x40,0x46,0xcc,0x00,0x03,0x0a,0x9c]
+
+[0x02,0x41,0x46,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x42,0x46,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x44,0x46,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1] ; encoding: [0x02,0x44,0x46,0xcc,0x00,0x03,0x0a,0x1c]
+
+[0x02,0x40,0x46,0xcc,0x00,0x02,0x0a,0x1c] # sgpr_0 src0
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[2:5], s0/*Invalid register, operand has 'VGPR_32' register class*/, v1, v[2:5] ; encoding: [0x02,0x40,0x46,0xcc,0x00,0x02,0x0a,0x1c]
+
+[0x02,0x40,0x46,0xcc,0x00,0x01,0x08,0x1c] # sgpr_0 src1
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, s0/*Invalid register, operand has 'VGPR_32' register class*/, v[2:5] ; encoding: [0x02,0x40,0x46,0xcc,0x00,0x01,0x08,0x1c]
+
+[0x02,0x40,0x46,0xcc,0x00,0x03,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, s[0:3]/*Invalid register, operand has 'VReg_128' register class*/ ; encoding: [0x02,0x40,0x46,0xcc,0x00,0x03,0x02,0x18]
+
+[0x02,0x40,0x46,0xcc,0xf2,0x02,0x0a,0x1c] # 1.0 src0
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[2:5], 1.0/*Invalid immediate*/, v1, v[2:5] ; encoding: [0x02,0x40,0x46,0xcc,0x00,0x02,0x0a,0x1c]
+
+[0x02,0x40,0x46,0xcc,0x00,0xe5,0x09,0x1c] # 1.0 src1
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, 1.0/*Invalid immediate*/, v[2:5] ; encoding: [0x02,0x40,0x46,0xcc,0x00,0x01,0x08,0x1c]
+
+[0x02,0x40,0x46,0xcc,0x00,0x03,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, 1.0 ; encoding: [0x02,0x40,0x46,0xcc,0x00,0x03,0xca,0x1b]
+
+[0x02,0x40,0x46,0xcc,0x00,0x03,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_f32_16x16x16_fp8_fp8 v[2:5], v0, v1, 1 ; encoding: [0x02,0x40,0x46,0xcc,0x00,0x03,0x06,0x1a]
+
+
+
+[0x02,0x40,0x47,0xcc,0x00,0x03,0x0a,0x1c]
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] ; encoding: [0x02,0x40,0x47,0xcc,0x00,0x03,0x0a,0x1c]
+
+[0x02,0xc0,0x47,0xcc,0x00,0x03,0x0a,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x48,0x47,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x50,0x47,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x60,0x47,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x47,0xcc,0x00,0x03,0x0a,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x47,0xcc,0x00,0x03,0x0a,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x00,0x47,0xcc,0x00,0x03,0x0a,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x48,0x47,0xcc,0x00,0x03,0x0a,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x50,0x47,0xcc,0x00,0x03,0x0a,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x58,0x47,0xcc,0x00,0x03,0x0a,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x47,0xcc,0x00,0x03,0x0a,0x3c] # neg_lo:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x47,0xcc,0x00,0x03,0x0a,0x5c] # neg_lo:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x47,0xcc,0x00,0x03,0x0a,0x9c] # neg_lo:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1] ; encoding: [0x02,0x40,0x47,0xcc,0x00,0x03,0x0a,0x9c]
+
+[0x02,0x41,0x47,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x42,0x47,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x44,0x47,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1] ; encoding: [0x02,0x44,0x47,0xcc,0x00,0x03,0x0a,0x1c]
+
+[0x02,0x40,0x47,0xcc,0x00,0x02,0x0a,0x1c] # sgpr_0 src0
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[2:5], s0/*Invalid register, operand has 'VGPR_32' register class*/, v1, v[2:5] ; encoding: [0x02,0x40,0x47,0xcc,0x00,0x02,0x0a,0x1c]
+
+[0x02,0x40,0x47,0xcc,0x00,0x01,0x08,0x1c] # sgpr_0 src1
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, s0/*Invalid register, operand has 'VGPR_32' register class*/, v[2:5] ; encoding: [0x02,0x40,0x47,0xcc,0x00,0x01,0x08,0x1c]
+
+[0x02,0x40,0x47,0xcc,0x00,0x03,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, s[0:3]/*Invalid register, operand has 'VReg_128' register class*/ ; encoding: [0x02,0x40,0x47,0xcc,0x00,0x03,0x02,0x18]
+
+[0x02,0x40,0x47,0xcc,0xf2,0x02,0x0a,0x1c] # 1.0 src0
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[2:5], 1.0/*Invalid immediate*/, v1, v[2:5] ; encoding: [0x02,0x40,0x47,0xcc,0x00,0x02,0x0a,0x1c]
+
+[0x02,0x40,0x47,0xcc,0x00,0xe5,0x09,0x1c] # 1.0 src1
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, 1.0/*Invalid immediate*/, v[2:5] ; encoding: [0x02,0x40,0x47,0xcc,0x00,0x01,0x08,0x1c]
+
+[0x02,0x40,0x47,0xcc,0x00,0x03,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, 1.0 ; encoding: [0x02,0x40,0x47,0xcc,0x00,0x03,0xca,0x1b]
+
+[0x02,0x40,0x47,0xcc,0x00,0x03,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_f32_16x16x16_fp8_bf8 v[2:5], v0, v1, 1 ; encoding: [0x02,0x40,0x47,0xcc,0x00,0x03,0x06,0x1a]
+
+
+
+[0x02,0x40,0x48,0xcc,0x00,0x03,0x0a,0x1c]
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] ; encoding: [0x02,0x40,0x48,0xcc,0x00,0x03,0x0a,0x1c]
+
+[0x02,0xc0,0x48,0xcc,0x00,0x03,0x0a,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x48,0x48,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x50,0x48,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x60,0x48,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x48,0xcc,0x00,0x03,0x0a,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x48,0xcc,0x00,0x03,0x0a,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x00,0x48,0xcc,0x00,0x03,0x0a,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x48,0x48,0xcc,0x00,0x03,0x0a,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x50,0x48,0xcc,0x00,0x03,0x0a,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x58,0x48,0xcc,0x00,0x03,0x0a,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x48,0xcc,0x00,0x03,0x0a,0x3c] # neg_lo:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x48,0xcc,0x00,0x03,0x0a,0x5c] # neg_lo:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x48,0xcc,0x00,0x03,0x0a,0x9c] # neg_lo:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1] ; encoding: [0x02,0x40,0x48,0xcc,0x00,0x03,0x0a,0x9c]
+
+[0x02,0x41,0x48,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x42,0x48,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x44,0x48,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1] ; encoding: [0x02,0x44,0x48,0xcc,0x00,0x03,0x0a,0x1c]
+
+[0x02,0x40,0x48,0xcc,0x00,0x02,0x0a,0x1c] # sgpr_0 src0
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[2:5], s0/*Invalid register, operand has 'VGPR_32' register class*/, v1, v[2:5] ; encoding: [0x02,0x40,0x48,0xcc,0x00,0x02,0x0a,0x1c]
+
+[0x02,0x40,0x48,0xcc,0x00,0x01,0x08,0x1c] # sgpr_0 src1
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, s0/*Invalid register, operand has 'VGPR_32' register class*/, v[2:5] ; encoding: [0x02,0x40,0x48,0xcc,0x00,0x01,0x08,0x1c]
+
+[0x02,0x40,0x48,0xcc,0x00,0x03,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, s[0:3]/*Invalid register, operand has 'VReg_128' register class*/ ; encoding: [0x02,0x40,0x48,0xcc,0x00,0x03,0x02,0x18]
+
+[0x02,0x40,0x48,0xcc,0xf2,0x02,0x0a,0x1c] # 1.0 src0
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[2:5], 1.0/*Invalid immediate*/, v1, v[2:5] ; encoding: [0x02,0x40,0x48,0xcc,0x00,0x02,0x0a,0x1c]
+
+[0x02,0x40,0x48,0xcc,0x00,0xe5,0x09,0x1c] # 1.0 src1
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, 1.0/*Invalid immediate*/, v[2:5] ; encoding: [0x02,0x40,0x48,0xcc,0x00,0x01,0x08,0x1c]
+
+[0x02,0x40,0x48,0xcc,0x00,0x03,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, 1.0 ; encoding: [0x02,0x40,0x48,0xcc,0x00,0x03,0xca,0x1b]
+
+[0x02,0x40,0x48,0xcc,0x00,0x03,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_f32_16x16x16_bf8_fp8 v[2:5], v0, v1, 1 ; encoding: [0x02,0x40,0x48,0xcc,0x00,0x03,0x06,0x1a]
+
+
+
+[0x02,0x40,0x49,0xcc,0x00,0x03,0x0a,0x1c]
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] ; encoding: [0x02,0x40,0x49,0xcc,0x00,0x03,0x0a,0x1c]
+
+[0x02,0xc0,0x49,0xcc,0x00,0x03,0x0a,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x48,0x49,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x50,0x49,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x60,0x49,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x49,0xcc,0x00,0x03,0x0a,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x49,0xcc,0x00,0x03,0x0a,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x00,0x49,0xcc,0x00,0x03,0x0a,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x48,0x49,0xcc,0x00,0x03,0x0a,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x50,0x49,0xcc,0x00,0x03,0x0a,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x58,0x49,0xcc,0x00,0x03,0x0a,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x49,0xcc,0x00,0x03,0x0a,0x3c] # neg_lo:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x49,0xcc,0x00,0x03,0x0a,0x5c] # neg_lo:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x49,0xcc,0x00,0x03,0x0a,0x9c] # neg_lo:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_lo:[0,0,1] ; encoding: [0x02,0x40,0x49,0xcc,0x00,0x03,0x0a,0x9c]
+
+[0x02,0x41,0x49,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x42,0x49,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x44,0x49,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[0,0,1]
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, v[2:5] neg_hi:[0,0,1] ; encoding: [0x02,0x44,0x49,0xcc,0x00,0x03,0x0a,0x1c]
+
+[0x02,0x40,0x49,0xcc,0x00,0x02,0x0a,0x1c] # sgpr_0 src0
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[2:5], s0/*Invalid register, operand has 'VGPR_32' register class*/, v1, v[2:5] ; encoding: [0x02,0x40,0x49,0xcc,0x00,0x02,0x0a,0x1c]
+
+[0x02,0x40,0x49,0xcc,0x00,0x01,0x08,0x1c] # sgpr_0 src1
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, s0/*Invalid register, operand has 'VGPR_32' register class*/, v[2:5] ; encoding: [0x02,0x40,0x49,0xcc,0x00,0x01,0x08,0x1c]
+
+[0x02,0x40,0x49,0xcc,0x00,0x03,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, s[0:3]/*Invalid register, operand has 'VReg_128' register class*/ ; encoding: [0x02,0x40,0x49,0xcc,0x00,0x03,0x02,0x18]
+
+[0x02,0x40,0x49,0xcc,0xf2,0x02,0x0a,0x1c] # 1.0 src0
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[2:5], 1.0/*Invalid immediate*/, v1, v[2:5] ; encoding: [0x02,0x40,0x49,0xcc,0x00,0x02,0x0a,0x1c]
+
+[0x02,0x40,0x49,0xcc,0x00,0xe5,0x09,0x1c] # 1.0 src1
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, 1.0/*Invalid immediate*/, v[2:5] ; encoding: [0x02,0x40,0x49,0xcc,0x00,0x01,0x08,0x1c]
+
+[0x02,0x40,0x49,0xcc,0x00,0x03,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, 1.0 ; encoding: [0x02,0x40,0x49,0xcc,0x00,0x03,0xca,0x1b]
+
+[0x02,0x40,0x49,0xcc,0x00,0x03,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_f32_16x16x16_bf8_bf8 v[2:5], v0, v1, 1 ; encoding: [0x02,0x40,0x49,0xcc,0x00,0x03,0x06,0x1a]
+
+
+
+[0x02,0x40,0x4a,0xcc,0x00,0x03,0x0a,0x1c]
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] ; encoding: [0x02,0x40,0x4a,0xcc,0x00,0x03,0x0a,0x1c]
+
+[0x02,0xc0,0x4a,0xcc,0x00,0x03,0x0a,0x1c] # clamp
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] clamp ; encoding: [0x02,0xc0,0x4a,0xcc,0x00,0x03,0x0a,0x1c]
+
+[0x02,0x48,0x4a,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x50,0x4a,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x60,0x4a,0xcc,0x00,0x03,0x0a,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x4a,0xcc,0x00,0x03,0x0a,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x4a,0xcc,0x00,0x03,0x0a,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x00,0x4a,0xcc,0x00,0x03,0x0a,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x48,0x4a,0xcc,0x00,0x03,0x0a,0x1c] # index_key:1
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x50,0x4a,0xcc,0x00,0x03,0x0a,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x58,0x4a,0xcc,0x00,0x03,0x0a,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x4a,0xcc,0x00,0x03,0x0a,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[1,0,0] ; encoding: [0x02,0x40,0x4a,0xcc,0x00,0x03,0x0a,0x3c]
+
+[0x02,0x40,0x4a,0xcc,0x00,0x03,0x0a,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, v[2:5] neg_lo:[0,1,0] ; encoding: [0x02,0x40,0x4a,0xcc,0x00,0x03,0x0a,0x5c]
+
+[0x02,0x40,0x4a,0xcc,0x00,0x03,0x0a,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x41,0x4a,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x42,0x4a,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x44,0x4a,0xcc,0x00,0x03,0x0a,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x4a,0xcc,0x00,0x02,0x0a,0x1c] # sgpr_0 src0
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[2:5], s0/*Invalid register, operand has 'VGPR_32' register class*/, v1, v[2:5] ; encoding: [0x02,0x40,0x4a,0xcc,0x00,0x02,0x0a,0x1c]
+
+[0x02,0x40,0x4a,0xcc,0x00,0x01,0x08,0x1c] # sgpr_0 src1
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[2:5], v0, s0/*Invalid register, operand has 'VGPR_32' register class*/, v[2:5] ; encoding: [0x02,0x40,0x4a,0xcc,0x00,0x01,0x08,0x1c]
+
+[0x02,0x40,0x4a,0xcc,0x00,0x03,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, s[0:3]/*Invalid register, operand has 'VReg_128' register class*/ ; encoding: [0x02,0x40,0x4a,0xcc,0x00,0x03,0x02,0x18]
+
+[0x02,0x40,0x4a,0xcc,0x81,0x02,0x0a,0x1c] # 1 src0
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[2:5], 1/*Invalid immediate*/, v1, v[2:5] ; encoding: [0x02,0x40,0x4a,0xcc,0x01,0x02,0x0a,0x1c]
+
+[0x02,0x40,0x4a,0xcc,0x00,0x03,0x09,0x1c] # 1 src1
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[2:5], v0, 1/*Invalid immediate*/, v[2:5] ; encoding: [0x02,0x40,0x4a,0xcc,0x00,0x03,0x08,0x1c]
+
+[0x02,0x40,0x4a,0xcc,0x00,0x03,0x06,0x1a] # 1 src2
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, 1 ; encoding: [0x02,0x40,0x4a,0xcc,0x00,0x03,0x06,0x1a]
+
+[0x02,0x40,0x4a,0xcc,0x00,0x03,0xca,0x1b] # 1.0 src2
+# GFX1170: v_wmma_i32_16x16x32_iu4 v[2:5], v0, v1, 1.0 ; encoding: [0x02,0x40,0x4a,0xcc,0x00,0x03,0xca,0x1b]
+
+
+
+[0x06,0x40,0x50,0xcc,0x00,0x05,0x2a,0x1c]
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 ; encoding: [0x06,0x40,0x50,0xcc,0x00,0x05,0x2a,0x1c]
+
+[0x06,0xc0,0x50,0xcc,0x00,0x05,0x2a,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x50,0xcc,0x00,0x05,0x2a,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 index_key:1 ; encoding: [0x06,0x48,0x50,0xcc,0x00,0x05,0x2a,0x1c]
+
+[0x06,0x50,0x50,0xcc,0x00,0x05,0x2a,0x1c] # op_sel:[0,1,0]
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 index_key:2 ; encoding: [0x06,0x50,0x50,0xcc,0x00,0x05,0x2a,0x1c]
+
+[0x06,0x60,0x50,0xcc,0x00,0x05,0x2a,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x50,0xcc,0x00,0x05,0x2a,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x50,0xcc,0x00,0x05,0x2a,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x00,0x50,0xcc,0x00,0x05,0x2a,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x50,0xcc,0x00,0x05,0x2a,0x1c] # index_key:1
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 index_key:1 ; encoding: [0x06,0x48,0x50,0xcc,0x00,0x05,0x2a,0x1c]
+
+[0x06,0x50,0x50,0xcc,0x00,0x05,0x2a,0x1c] # index_key:2
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 index_key:2 ; encoding: [0x06,0x50,0x50,0xcc,0x00,0x05,0x2a,0x1c]
+
+[0x06,0x58,0x50,0xcc,0x00,0x05,0x2a,0x1c] # index_key:3
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 index_key:3 ; encoding: [0x06,0x58,0x50,0xcc,0x00,0x05,0x2a,0x1c]
+
+[0x06,0x40,0x50,0xcc,0x00,0x05,0x2a,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[1,0,0] ; encoding: [0x06,0x40,0x50,0xcc,0x00,0x05,0x2a,0x3c]
+
+[0x06,0x40,0x50,0xcc,0x00,0x05,0x2a,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[0,1,0] ; encoding: [0x06,0x40,0x50,0xcc,0x00,0x05,0x2a,0x5c]
+
+[0x06,0x40,0x50,0xcc,0x00,0x05,0x2a,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x41,0x50,0xcc,0x00,0x05,0x2a,0x1c] # neg_hi:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_hi:[1,0,0] ; encoding: [0x06,0x41,0x50,0xcc,0x00,0x05,0x2a,0x1c]
+
+[0x06,0x42,0x50,0xcc,0x00,0x05,0x2a,0x1c] # neg_hi:[0,1,0]
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], v10 neg_hi:[0,1,0] ; encoding: [0x06,0x42,0x50,0xcc,0x00,0x05,0x2a,0x1c]
+
+[0x06,0x44,0x50,0xcc,0x00,0x05,0x2a,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x50,0xcc,0x00,0x04,0x2a,0x1c] # sgpr_0 src0
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:5], v10 ; encoding: [0x06,0x40,0x50,0xcc,0x00,0x04,0x2a,0x1c]
+
+[0x06,0x40,0x50,0xcc,0x00,0x01,0x28,0x1c] # sgpr_0 src1
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v10 ; encoding: [0x06,0x40,0x50,0xcc,0x00,0x01,0x28,0x1c]
+
+[0x06,0x40,0x50,0xcc,0x00,0x05,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], s0/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x06,0x40,0x50,0xcc,0x00,0x05,0x02,0x18]
+
+[0x06,0x40,0x50,0xcc,0xf2,0x04,0x2a,0x1c] # 1.0 src0
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], 1.0/*Invalid immediate*/, v[2:5], v10 ; encoding: [0x06,0x40,0x50,0xcc,0x00,0x04,0x2a,0x1c]
+
+[0x06,0x40,0x50,0xcc,0x00,0xe5,0x29,0x1c] # 1.0 src1
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], 1.0/*Invalid immediate*/, v10 ; encoding: [0x06,0x40,0x50,0xcc,0x00,0x01,0x28,0x1c]
+
+[0x06,0x40,0x50,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], 1.0/*Invalid immediate*/ ; encoding: [0x06,0x40,0x50,0xcc,0x00,0x05,0x02,0x18]
+
+[0x06,0x40,0x50,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_f32_16x16x32_f16 v[6:9], v[0:1], v[2:5], 1/*Invalid immediate*/ ; encoding: [0x06,0x40,0x50,0xcc,0x00,0x05,0x06,0x18]
+
+
+
+[0x06,0x40,0x51,0xcc,0x00,0x05,0x2a,0x1c]
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 ; encoding: [0x06,0x40,0x51,0xcc,0x00,0x05,0x2a,0x1c]
+
+[0x06,0xc0,0x51,0xcc,0x00,0x05,0x2a,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x51,0xcc,0x00,0x05,0x2a,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 index_key:1 ; encoding: [0x06,0x48,0x51,0xcc,0x00,0x05,0x2a,0x1c]
+
+[0x06,0x50,0x51,0xcc,0x00,0x05,0x2a,0x1c] # op_sel:[0,1,0]
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 index_key:2 ; encoding: [0x06,0x50,0x51,0xcc,0x00,0x05,0x2a,0x1c]
+
+[0x06,0x60,0x51,0xcc,0x00,0x05,0x2a,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x51,0xcc,0x00,0x05,0x2a,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x51,0xcc,0x00,0x05,0x2a,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x00,0x51,0xcc,0x00,0x05,0x2a,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x51,0xcc,0x00,0x05,0x2a,0x1c] # index_key:1
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 index_key:1 ; encoding: [0x06,0x48,0x51,0xcc,0x00,0x05,0x2a,0x1c]
+
+[0x06,0x50,0x51,0xcc,0x00,0x05,0x2a,0x1c] # index_key:2
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 index_key:2 ; encoding: [0x06,0x50,0x51,0xcc,0x00,0x05,0x2a,0x1c]
+
+[0x06,0x58,0x51,0xcc,0x00,0x05,0x2a,0x1c] # index_key:3
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 index_key:3 ; encoding: [0x06,0x58,0x51,0xcc,0x00,0x05,0x2a,0x1c]
+
+[0x06,0x40,0x51,0xcc,0x00,0x05,0x2a,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[1,0,0] ; encoding: [0x06,0x40,0x51,0xcc,0x00,0x05,0x2a,0x3c]
+
+[0x06,0x40,0x51,0xcc,0x00,0x05,0x2a,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 neg_lo:[0,1,0] ; encoding: [0x06,0x40,0x51,0xcc,0x00,0x05,0x2a,0x5c]
+
+[0x06,0x40,0x51,0xcc,0x00,0x05,0x2a,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x41,0x51,0xcc,0x00,0x05,0x2a,0x1c] # neg_hi:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 neg_hi:[1,0,0] ; encoding: [0x06,0x41,0x51,0xcc,0x00,0x05,0x2a,0x1c]
+
+[0x06,0x42,0x51,0xcc,0x00,0x05,0x2a,0x1c] # neg_hi:[0,1,0]
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], v10 neg_hi:[0,1,0] ; encoding: [0x06,0x42,0x51,0xcc,0x00,0x05,0x2a,0x1c]
+
+[0x06,0x44,0x51,0xcc,0x00,0x05,0x2a,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x51,0xcc,0x00,0x04,0x2a,0x1c] # sgpr_0 src0
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:5], v10 ; encoding: [0x06,0x40,0x51,0xcc,0x00,0x04,0x2a,0x1c]
+
+[0x06,0x40,0x51,0xcc,0x00,0x01,0x28,0x1c] # sgpr_0 src1
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v10 ; encoding: [0x06,0x40,0x51,0xcc,0x00,0x01,0x28,0x1c]
+
+[0x06,0x40,0x51,0xcc,0x00,0x05,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], s0/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x06,0x40,0x51,0xcc,0x00,0x05,0x02,0x18]
+
+[0x06,0x40,0x51,0xcc,0xf2,0x04,0x2a,0x1c] # 1.0 src0
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], 1.0/*Invalid immediate*/, v[2:5], v10 ; encoding: [0x06,0x40,0x51,0xcc,0x00,0x04,0x2a,0x1c]
+
+[0x06,0x40,0x51,0xcc,0x00,0xe5,0x29,0x1c] # 1.0 src1
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], 1.0/*Invalid immediate*/, v10 ; encoding: [0x06,0x40,0x51,0xcc,0x00,0x01,0x28,0x1c]
+
+[0x06,0x40,0x51,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], 1.0/*Invalid immediate*/ ; encoding: [0x06,0x40,0x51,0xcc,0x00,0x05,0x02,0x18]
+
+[0x06,0x40,0x51,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_f32_16x16x32_bf16 v[6:9], v[0:1], v[2:5], 1/*Invalid immediate*/ ; encoding: [0x06,0x40,0x51,0xcc,0x00,0x05,0x06,0x18]
+
+
+
+[0x06,0x40,0x52,0xcc,0x00,0x05,0x22,0x1c]
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 ; encoding: [0x06,0x40,0x52,0xcc,0x00,0x05,0x22,0x1c]
+
+[0x06,0xc0,0x52,0xcc,0x00,0x05,0x22,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x52,0xcc,0x00,0x05,0x22,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 index_key:1 ; encoding: [0x06,0x48,0x52,0xcc,0x00,0x05,0x22,0x1c]
+
+[0x06,0x50,0x52,0xcc,0x00,0x05,0x22,0x1c] # op_sel:[0,1,0]
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 index_key:2 ; encoding: [0x06,0x50,0x52,0xcc,0x00,0x05,0x22,0x1c]
+
+[0x06,0x60,0x52,0xcc,0x00,0x05,0x22,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x52,0xcc,0x00,0x05,0x22,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x52,0xcc,0x00,0x05,0x22,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x00,0x52,0xcc,0x00,0x05,0x22,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x52,0xcc,0x00,0x05,0x22,0x1c] # index_key:1
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 index_key:1 ; encoding: [0x06,0x48,0x52,0xcc,0x00,0x05,0x22,0x1c]
+
+[0x06,0x50,0x52,0xcc,0x00,0x05,0x22,0x1c] # index_key:2
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 index_key:2 ; encoding: [0x06,0x50,0x52,0xcc,0x00,0x05,0x22,0x1c]
+
+[0x06,0x58,0x52,0xcc,0x00,0x05,0x22,0x1c] # index_key:3
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 index_key:3 ; encoding: [0x06,0x58,0x52,0xcc,0x00,0x05,0x22,0x1c]
+
+[0x06,0x40,0x52,0xcc,0x00,0x05,0x22,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[1,0,0] ; encoding: [0x06,0x40,0x52,0xcc,0x00,0x05,0x22,0x3c]
+
+[0x06,0x40,0x52,0xcc,0x00,0x05,0x22,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[0,1,0] ; encoding: [0x06,0x40,0x52,0xcc,0x00,0x05,0x22,0x5c]
+
+[0x06,0x40,0x52,0xcc,0x00,0x05,0x22,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x41,0x52,0xcc,0x00,0x05,0x22,0x1c] # neg_hi:[1,0,0]
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_hi:[1,0,0] ; encoding: [0x06,0x41,0x52,0xcc,0x00,0x05,0x22,0x1c]
+
+[0x06,0x42,0x52,0xcc,0x00,0x05,0x22,0x1c] # neg_hi:[0,1,0]
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], v8 neg_hi:[0,1,0] ; encoding: [0x06,0x42,0x52,0xcc,0x00,0x05,0x22,0x1c]
+
+[0x06,0x44,0x52,0xcc,0x00,0x05,0x22,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x52,0xcc,0x00,0x04,0x22,0x1c] # sgpr_0 src0
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:5], v8 ; encoding: [0x06,0x40,0x52,0xcc,0x00,0x04,0x22,0x1c]
+
+[0x06,0x40,0x52,0xcc,0x00,0x01,0x20,0x1c] # sgpr_0 src1
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v8 ; encoding: [0x06,0x40,0x52,0xcc,0x00,0x01,0x20,0x1c]
+
+[0x06,0x40,0x52,0xcc,0x00,0x05,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], s0/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x06,0x40,0x52,0xcc,0x00,0x05,0x02,0x18]
+
+[0x06,0x40,0x52,0xcc,0xf2,0x04,0x22,0x1c] # 1.0 src0
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], 1.0/*Invalid immediate*/, v[2:5], v8 ; encoding: [0x06,0x40,0x52,0xcc,0x00,0x04,0x22,0x1c]
+
+[0x06,0x40,0x52,0xcc,0x00,0xe5,0x21,0x1c] # 1.0 src1
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], 1.0/*Invalid immediate*/, v8 ; encoding: [0x06,0x40,0x52,0xcc,0x00,0x01,0x20,0x1c]
+
+[0x06,0x40,0x52,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], 1.0/*Invalid immediate*/ ; encoding: [0x06,0x40,0x52,0xcc,0x00,0x05,0x02,0x18]
+
+[0x06,0x40,0x52,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_f16_16x16x32_f16 v[6:7], v[0:1], v[2:5], 1/*Invalid immediate*/ ; encoding: [0x06,0x40,0x52,0xcc,0x00,0x05,0x06,0x18]
+
+
+
+[0x06,0x40,0x53,0xcc,0x00,0x05,0x22,0x1c]
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 ; encoding: [0x06,0x40,0x53,0xcc,0x00,0x05,0x22,0x1c]
+
+[0x06,0xc0,0x53,0xcc,0x00,0x05,0x22,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x53,0xcc,0x00,0x05,0x22,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 index_key:1 ; encoding: [0x06,0x48,0x53,0xcc,0x00,0x05,0x22,0x1c]
+
+[0x06,0x50,0x53,0xcc,0x00,0x05,0x22,0x1c] # op_sel:[0,1,0]
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 index_key:2 ; encoding: [0x06,0x50,0x53,0xcc,0x00,0x05,0x22,0x1c]
+
+[0x06,0x60,0x53,0xcc,0x00,0x05,0x22,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x53,0xcc,0x00,0x05,0x22,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x53,0xcc,0x00,0x05,0x22,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x00,0x53,0xcc,0x00,0x05,0x22,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x48,0x53,0xcc,0x00,0x05,0x22,0x1c] # index_key:1
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 index_key:1 ; encoding: [0x06,0x48,0x53,0xcc,0x00,0x05,0x22,0x1c]
+
+[0x06,0x50,0x53,0xcc,0x00,0x05,0x22,0x1c] # index_key:2
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 index_key:2 ; encoding: [0x06,0x50,0x53,0xcc,0x00,0x05,0x22,0x1c]
+
+[0x06,0x58,0x53,0xcc,0x00,0x05,0x22,0x1c] # index_key:3
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 index_key:3 ; encoding: [0x06,0x58,0x53,0xcc,0x00,0x05,0x22,0x1c]
+
+[0x06,0x40,0x53,0xcc,0x00,0x05,0x22,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[1,0,0] ; encoding: [0x06,0x40,0x53,0xcc,0x00,0x05,0x22,0x3c]
+
+[0x06,0x40,0x53,0xcc,0x00,0x05,0x22,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 neg_lo:[0,1,0] ; encoding: [0x06,0x40,0x53,0xcc,0x00,0x05,0x22,0x5c]
+
+[0x06,0x40,0x53,0xcc,0x00,0x05,0x22,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x41,0x53,0xcc,0x00,0x05,0x22,0x1c] # neg_hi:[1,0,0]
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 neg_hi:[1,0,0] ; encoding: [0x06,0x41,0x53,0xcc,0x00,0x05,0x22,0x1c]
+
+[0x06,0x42,0x53,0xcc,0x00,0x05,0x22,0x1c] # neg_hi:[0,1,0]
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], v8 neg_hi:[0,1,0] ; encoding: [0x06,0x42,0x53,0xcc,0x00,0x05,0x22,0x1c]
+
+[0x06,0x44,0x53,0xcc,0x00,0x05,0x22,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x06,0x40,0x53,0xcc,0x00,0x04,0x22,0x1c] # sgpr_0 src0
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v[2:5], v8 ; encoding: [0x06,0x40,0x53,0xcc,0x00,0x04,0x22,0x1c]
+
+[0x06,0x40,0x53,0xcc,0x00,0x01,0x20,0x1c] # sgpr_0 src1
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], s[0:3]/*Invalid register, operand has 'VReg_128' register class*/, v8 ; encoding: [0x06,0x40,0x53,0xcc,0x00,0x01,0x20,0x1c]
+
+[0x06,0x40,0x53,0xcc,0x00,0x05,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], s0/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x06,0x40,0x53,0xcc,0x00,0x05,0x02,0x18]
+
+[0x06,0x40,0x53,0xcc,0xf2,0x04,0x22,0x1c] # 1.0 src0
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], 1.0/*Invalid immediate*/, v[2:5], v8 ; encoding: [0x06,0x40,0x53,0xcc,0x00,0x04,0x22,0x1c]
+
+[0x06,0x40,0x53,0xcc,0x00,0xe5,0x21,0x1c] # 1.0 src1
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], 1.0/*Invalid immediate*/, v8 ; encoding: [0x06,0x40,0x53,0xcc,0x00,0x01,0x20,0x1c]
+
+[0x06,0x40,0x53,0xcc,0x00,0x05,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], 1.0/*Invalid immediate*/ ; encoding: [0x06,0x40,0x53,0xcc,0x00,0x05,0x02,0x18]
+
+[0x06,0x40,0x53,0xcc,0x00,0x05,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_bf16_16x16x32_bf16 v[6:7], v[0:1], v[2:5], 1/*Invalid immediate*/ ; encoding: [0x06,0x40,0x53,0xcc,0x00,0x05,0x06,0x18]
+
+
+
+[0x03,0x40,0x54,0xcc,0x00,0x03,0x1e,0x1c]
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 ; encoding: [0x03,0x40,0x54,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0xc0,0x54,0xcc,0x00,0x03,0x1e,0x1c] # clamp
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 clamp ; encoding: [0x03,0xc0,0x54,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x48,0x54,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x54,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x50,0x54,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[0,1,0]
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 index_key:2 ; encoding: [0x03,0x50,0x54,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x60,0x54,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x54,0xcc,0x00,0x03,0x1e,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x54,0xcc,0x00,0x03,0x1e,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x00,0x54,0xcc,0x00,0x03,0x1e,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x48,0x54,0xcc,0x00,0x03,0x1e,0x1c] # index_key:1
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x54,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x50,0x54,0xcc,0x00,0x03,0x1e,0x1c] # index_key:2
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 index_key:2 ; encoding: [0x03,0x50,0x54,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x58,0x54,0xcc,0x00,0x03,0x1e,0x1c] # index_key:3
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 index_key:3 ; encoding: [0x03,0x58,0x54,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x40,0x54,0xcc,0x00,0x03,0x1e,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0] ; encoding: [0x03,0x40,0x54,0xcc,0x00,0x03,0x1e,0x3c]
+
+[0x03,0x40,0x54,0xcc,0x00,0x03,0x1e,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0] ; encoding: [0x03,0x40,0x54,0xcc,0x00,0x03,0x1e,0x5c]
+
+[0x03,0x40,0x54,0xcc,0x00,0x03,0x1e,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x41,0x54,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x42,0x54,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x44,0x54,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x54,0xcc,0x00,0x02,0x1e,0x1c] # sgpr_0 src0
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], s0/*Invalid register, operand has 'VGPR_32' register class*/, v[1:2], v7 ; encoding: [0x03,0x40,0x54,0xcc,0x00,0x02,0x1e,0x1c]
+
+[0x03,0x40,0x54,0xcc,0x00,0x01,0x1c,0x1c] # sgpr_0 src1
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v7 ; encoding: [0x03,0x40,0x54,0xcc,0x00,0x01,0x1c,0x1c]
+
+[0x03,0x40,0x54,0xcc,0x00,0x03,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], s0/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x03,0x40,0x54,0xcc,0x00,0x03,0x02,0x18]
+
+[0x03,0x40,0x54,0xcc,0x81,0x02,0x1e,0x1c] # 1 src0
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], 1/*Invalid immediate*/, v[1:2], v7 ; encoding: [0x03,0x40,0x54,0xcc,0x01,0x02,0x1e,0x1c]
+
+[0x03,0x40,0x54,0xcc,0x00,0x03,0x1d,0x1c] # 1 src1
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, 1/*Invalid immediate*/, v7 ; encoding: [0x03,0x40,0x54,0xcc,0x00,0x03,0x1c,0x1c]
+
+[0x03,0x40,0x54,0xcc,0x00,0x03,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], 1/*Invalid immediate*/ ; encoding: [0x03,0x40,0x54,0xcc,0x00,0x03,0x06,0x18]
+
+[0x03,0x40,0x54,0xcc,0x00,0x03,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_i32_16x16x32_iu8 v[3:6], v0, v[1:2], 1.0/*Invalid immediate*/ ; encoding: [0x03,0x40,0x54,0xcc,0x00,0x03,0x02,0x18]
+
+
+
+[0x02,0x40,0x55,0xcc,0x00,0x03,0x1a,0x1c]
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 ; encoding: [0x02,0x40,0x55,0xcc,0x00,0x03,0x1a,0x1c]
+
+[0x02,0xc0,0x55,0xcc,0x00,0x03,0x1a,0x1c] # clamp
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 clamp ; encoding: [0x02,0xc0,0x55,0xcc,0x00,0x03,0x1a,0x1c]
+
+[0x02,0x48,0x55,0xcc,0x00,0x03,0x1a,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 index_key:1 ; encoding: [0x02,0x48,0x55,0xcc,0x00,0x03,0x1a,0x1c]
+
+[0x02,0x50,0x55,0xcc,0x00,0x03,0x1a,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x60,0x55,0xcc,0x00,0x03,0x1a,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x55,0xcc,0x00,0x03,0x1a,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x55,0xcc,0x00,0x03,0x1a,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x00,0x55,0xcc,0x00,0x03,0x1a,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x48,0x55,0xcc,0x00,0x03,0x1a,0x1c] # index_key:1
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 index_key:1 ; encoding: [0x02,0x48,0x55,0xcc,0x00,0x03,0x1a,0x1c]
+
+[0x02,0x50,0x55,0xcc,0x00,0x03,0x1a,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x58,0x55,0xcc,0x00,0x03,0x1a,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x55,0xcc,0x00,0x03,0x1a,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_lo:[1,0,0] ; encoding: [0x02,0x40,0x55,0xcc,0x00,0x03,0x1a,0x3c]
+
+[0x02,0x40,0x55,0xcc,0x00,0x03,0x1a,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, v6 neg_lo:[0,1,0] ; encoding: [0x02,0x40,0x55,0xcc,0x00,0x03,0x1a,0x5c]
+
+[0x02,0x40,0x55,0xcc,0x00,0x03,0x1a,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x41,0x55,0xcc,0x00,0x03,0x1a,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x42,0x55,0xcc,0x00,0x03,0x1a,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x44,0x55,0xcc,0x00,0x03,0x1a,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x02,0x40,0x55,0xcc,0x00,0x02,0x1a,0x1c] # sgpr_0 src0
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], s0/*Invalid register, operand has 'VGPR_32' register class*/, v1, v6 ; encoding: [0x02,0x40,0x55,0xcc,0x00,0x02,0x1a,0x1c]
+
+[0x02,0x40,0x55,0xcc,0x00,0x01,0x18,0x1c] # sgpr_0 src1
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], v0, s0/*Invalid register, operand has 'VGPR_32' register class*/, v6 ; encoding: [0x02,0x40,0x55,0xcc,0x00,0x01,0x18,0x1c]
+
+[0x02,0x40,0x55,0xcc,0x00,0x03,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, s0/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x02,0x40,0x55,0xcc,0x00,0x03,0x02,0x18]
+
+[0x02,0x40,0x55,0xcc,0x81,0x02,0x1a,0x1c] # 1 src0
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], 1/*Invalid immediate*/, v1, v6 ; encoding: [0x02,0x40,0x55,0xcc,0x01,0x02,0x1a,0x1c]
+
+[0x02,0x40,0x55,0xcc,0x00,0x03,0x19,0x1c] # 1 src1
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], v0, 1/*Invalid immediate*/, v6 ; encoding: [0x02,0x40,0x55,0xcc,0x00,0x03,0x18,0x1c]
+
+[0x02,0x40,0x55,0xcc,0x00,0x03,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, 1/*Invalid immediate*/ ; encoding: [0x02,0x40,0x55,0xcc,0x00,0x03,0x06,0x18]
+
+[0x02,0x40,0x55,0xcc,0x00,0x03,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_i32_16x16x32_iu4 v[2:5], v0, v1, 1.0/*Invalid immediate*/ ; encoding: [0x02,0x40,0x55,0xcc,0x00,0x03,0x02,0x18]
+
+
+
+[0x03,0x40,0x56,0xcc,0x00,0x03,0x1e,0x1c]
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 ; encoding: [0x03,0x40,0x56,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0xc0,0x56,0xcc,0x00,0x03,0x1e,0x1c] # clamp
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 clamp ; encoding: [0x03,0xc0,0x56,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x48,0x56,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x56,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x50,0x56,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x60,0x56,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x56,0xcc,0x00,0x03,0x1e,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x56,0xcc,0x00,0x03,0x1e,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x00,0x56,0xcc,0x00,0x03,0x1e,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x48,0x56,0xcc,0x00,0x03,0x1e,0x1c] # index_key:1
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x56,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x50,0x56,0xcc,0x00,0x03,0x1e,0x1c] # index_key:2
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x58,0x56,0xcc,0x00,0x03,0x1e,0x1c] # index_key:3
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x56,0xcc,0x00,0x03,0x1e,0x3c] # neg_lo:[1,0,0]
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_lo:[1,0,0] ; encoding: [0x03,0x40,0x56,0xcc,0x00,0x03,0x1e,0x3c]
+
+[0x03,0x40,0x56,0xcc,0x00,0x03,0x1e,0x5c] # neg_lo:[0,1,0]
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], v7 neg_lo:[0,1,0] ; encoding: [0x03,0x40,0x56,0xcc,0x00,0x03,0x1e,0x5c]
+
+[0x03,0x40,0x56,0xcc,0x00,0x03,0x1e,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x41,0x56,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x42,0x56,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x44,0x56,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x56,0xcc,0x00,0x02,0x1e,0x1c] # sgpr_0 src0
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], s0/*Invalid register, operand has 'VGPR_32' register class*/, v[1:2], v7 ; encoding: [0x03,0x40,0x56,0xcc,0x00,0x02,0x1e,0x1c]
+
+[0x03,0x40,0x56,0xcc,0x00,0x01,0x1c,0x1c] # sgpr_0 src1
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], v0, s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v7 ; encoding: [0x03,0x40,0x56,0xcc,0x00,0x01,0x1c,0x1c]
+
+[0x03,0x40,0x56,0xcc,0x00,0x03,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], s0/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x03,0x40,0x56,0xcc,0x00,0x03,0x02,0x18]
+
+[0x03,0x40,0x56,0xcc,0x81,0x02,0x1e,0x1c] # 1 src0
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], 1/*Invalid immediate*/, v[1:2], v7 ; encoding: [0x03,0x40,0x56,0xcc,0x01,0x02,0x1e,0x1c]
+
+[0x03,0x40,0x56,0xcc,0x00,0x03,0x1d,0x1c] # 1 src1
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], v0, 1/*Invalid immediate*/, v7 ; encoding: [0x03,0x40,0x56,0xcc,0x00,0x03,0x1c,0x1c]
+
+[0x03,0x40,0x56,0xcc,0x00,0x03,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], 1/*Invalid immediate*/ ; encoding: [0x03,0x40,0x56,0xcc,0x00,0x03,0x06,0x18]
+
+[0x03,0x40,0x56,0xcc,0x00,0x03,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_i32_16x16x64_iu4 v[3:6], v0, v[1:2], 1.0/*Invalid immediate*/ ; encoding: [0x03,0x40,0x56,0xcc,0x00,0x03,0x02,0x18]
+
+
+
+[0x03,0x40,0x57,0xcc,0x00,0x03,0x1e,0x1c]
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 ; encoding: [0x03,0x40,0x57,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0xc0,0x57,0xcc,0x00,0x03,0x1e,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x48,0x57,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x57,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x50,0x57,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[0,1,0]
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 index_key:2 ; encoding: [0x03,0x50,0x57,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x60,0x57,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x57,0xcc,0x00,0x03,0x1e,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x57,0xcc,0x00,0x03,0x1e,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x00,0x57,0xcc,0x00,0x03,0x1e,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x48,0x57,0xcc,0x00,0x03,0x1e,0x1c] # index_key:1
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x57,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x50,0x57,0xcc,0x00,0x03,0x1e,0x1c] # index_key:2
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 index_key:2 ; encoding: [0x03,0x50,0x57,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x58,0x57,0xcc,0x00,0x03,0x1e,0x1c] # index_key:3
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], v7 index_key:3 ; encoding: [0x03,0x58,0x57,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x40,0x57,0xcc,0x00,0x03,0x1e,0x3c] # neg_lo:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x57,0xcc,0x00,0x03,0x1e,0x5c] # neg_lo:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x57,0xcc,0x00,0x03,0x1e,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x41,0x57,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x42,0x57,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x44,0x57,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x57,0xcc,0x00,0x02,0x1e,0x1c] # sgpr_0 src0
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], s0/*Invalid register, operand has 'VGPR_32' register class*/, v[1:2], v7 ; encoding: [0x03,0x40,0x57,0xcc,0x00,0x02,0x1e,0x1c]
+
+[0x03,0x40,0x57,0xcc,0x00,0x01,0x1c,0x1c] # sgpr_0 src1
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v7 ; encoding: [0x03,0x40,0x57,0xcc,0x00,0x01,0x1c,0x1c]
+
+[0x03,0x40,0x57,0xcc,0x00,0x03,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], s0/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x03,0x40,0x57,0xcc,0x00,0x03,0x02,0x18]
+
+[0x03,0x40,0x57,0xcc,0xf2,0x02,0x1e,0x1c] # 1.0 src0
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], 1.0/*Invalid immediate*/, v[1:2], v7 ; encoding: [0x03,0x40,0x57,0xcc,0x00,0x02,0x1e,0x1c]
+
+[0x03,0x40,0x57,0xcc,0x00,0xe5,0x1d,0x1c] # 1.0 src1
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, 1.0/*Invalid immediate*/, v7 ; encoding: [0x03,0x40,0x57,0xcc,0x00,0x01,0x1c,0x1c]
+
+[0x03,0x40,0x57,0xcc,0x00,0x03,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], 1.0/*Invalid immediate*/ ; encoding: [0x03,0x40,0x57,0xcc,0x00,0x03,0x02,0x18]
+
+[0x03,0x40,0x57,0xcc,0x00,0x03,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_f32_16x16x32_fp8_fp8 v[3:6], v0, v[1:2], 1/*Invalid immediate*/ ; encoding: [0x03,0x40,0x57,0xcc,0x00,0x03,0x06,0x18]
+
+
+
+[0x03,0x40,0x58,0xcc,0x00,0x03,0x1e,0x1c]
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 ; encoding: [0x03,0x40,0x58,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0xc0,0x58,0xcc,0x00,0x03,0x1e,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x48,0x58,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x58,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x50,0x58,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[0,1,0]
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 index_key:2 ; encoding: [0x03,0x50,0x58,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x60,0x58,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x58,0xcc,0x00,0x03,0x1e,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x58,0xcc,0x00,0x03,0x1e,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x00,0x58,0xcc,0x00,0x03,0x1e,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x48,0x58,0xcc,0x00,0x03,0x1e,0x1c] # index_key:1
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x58,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x50,0x58,0xcc,0x00,0x03,0x1e,0x1c] # index_key:2
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 index_key:2 ; encoding: [0x03,0x50,0x58,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x58,0x58,0xcc,0x00,0x03,0x1e,0x1c] # index_key:3
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], v7 index_key:3 ; encoding: [0x03,0x58,0x58,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x40,0x58,0xcc,0x00,0x03,0x1e,0x3c] # neg_lo:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x58,0xcc,0x00,0x03,0x1e,0x5c] # neg_lo:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x58,0xcc,0x00,0x03,0x1e,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x41,0x58,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x42,0x58,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x44,0x58,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x58,0xcc,0x00,0x02,0x1e,0x1c] # sgpr_0 src0
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], s0/*Invalid register, operand has 'VGPR_32' register class*/, v[1:2], v7 ; encoding: [0x03,0x40,0x58,0xcc,0x00,0x02,0x1e,0x1c]
+
+[0x03,0x40,0x58,0xcc,0x00,0x01,0x1c,0x1c] # sgpr_0 src1
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v7 ; encoding: [0x03,0x40,0x58,0xcc,0x00,0x01,0x1c,0x1c]
+
+[0x03,0x40,0x58,0xcc,0x00,0x03,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], s0/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x03,0x40,0x58,0xcc,0x00,0x03,0x02,0x18]
+
+[0x03,0x40,0x58,0xcc,0xf2,0x02,0x1e,0x1c] # 1.0 src0
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], 1.0/*Invalid immediate*/, v[1:2], v7 ; encoding: [0x03,0x40,0x58,0xcc,0x00,0x02,0x1e,0x1c]
+
+[0x03,0x40,0x58,0xcc,0x00,0xe5,0x1d,0x1c] # 1.0 src1
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, 1.0/*Invalid immediate*/, v7 ; encoding: [0x03,0x40,0x58,0xcc,0x00,0x01,0x1c,0x1c]
+
+[0x03,0x40,0x58,0xcc,0x00,0x03,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], 1.0/*Invalid immediate*/ ; encoding: [0x03,0x40,0x58,0xcc,0x00,0x03,0x02,0x18]
+
+[0x03,0x40,0x58,0xcc,0x00,0x03,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_f32_16x16x32_fp8_bf8 v[3:6], v0, v[1:2], 1/*Invalid immediate*/ ; encoding: [0x03,0x40,0x58,0xcc,0x00,0x03,0x06,0x18]
+
+
+
+[0x03,0x40,0x59,0xcc,0x00,0x03,0x1e,0x1c]
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 ; encoding: [0x03,0x40,0x59,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0xc0,0x59,0xcc,0x00,0x03,0x1e,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x48,0x59,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x59,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x50,0x59,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[0,1,0]
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 index_key:2 ; encoding: [0x03,0x50,0x59,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x60,0x59,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x59,0xcc,0x00,0x03,0x1e,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x59,0xcc,0x00,0x03,0x1e,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x00,0x59,0xcc,0x00,0x03,0x1e,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x48,0x59,0xcc,0x00,0x03,0x1e,0x1c] # index_key:1
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x59,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x50,0x59,0xcc,0x00,0x03,0x1e,0x1c] # index_key:2
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 index_key:2 ; encoding: [0x03,0x50,0x59,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x58,0x59,0xcc,0x00,0x03,0x1e,0x1c] # index_key:3
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], v7 index_key:3 ; encoding: [0x03,0x58,0x59,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x40,0x59,0xcc,0x00,0x03,0x1e,0x3c] # neg_lo:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x59,0xcc,0x00,0x03,0x1e,0x5c] # neg_lo:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x59,0xcc,0x00,0x03,0x1e,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x41,0x59,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x42,0x59,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x44,0x59,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x59,0xcc,0x00,0x02,0x1e,0x1c] # sgpr_0 src0
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], s0/*Invalid register, operand has 'VGPR_32' register class*/, v[1:2], v7 ; encoding: [0x03,0x40,0x59,0xcc,0x00,0x02,0x1e,0x1c]
+
+[0x03,0x40,0x59,0xcc,0x00,0x01,0x1c,0x1c] # sgpr_0 src1
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v7 ; encoding: [0x03,0x40,0x59,0xcc,0x00,0x01,0x1c,0x1c]
+
+[0x03,0x40,0x59,0xcc,0x00,0x03,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], s0/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x03,0x40,0x59,0xcc,0x00,0x03,0x02,0x18]
+
+[0x03,0x40,0x59,0xcc,0xf2,0x02,0x1e,0x1c] # 1.0 src0
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], 1.0/*Invalid immediate*/, v[1:2], v7 ; encoding: [0x03,0x40,0x59,0xcc,0x00,0x02,0x1e,0x1c]
+
+[0x03,0x40,0x59,0xcc,0x00,0xe5,0x1d,0x1c] # 1.0 src1
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, 1.0/*Invalid immediate*/, v7 ; encoding: [0x03,0x40,0x59,0xcc,0x00,0x01,0x1c,0x1c]
+
+[0x03,0x40,0x59,0xcc,0x00,0x03,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], 1.0/*Invalid immediate*/ ; encoding: [0x03,0x40,0x59,0xcc,0x00,0x03,0x02,0x18]
+
+[0x03,0x40,0x59,0xcc,0x00,0x03,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_f32_16x16x32_bf8_fp8 v[3:6], v0, v[1:2], 1/*Invalid immediate*/ ; encoding: [0x03,0x40,0x59,0xcc,0x00,0x03,0x06,0x18]
+
+
+
+[0x03,0x40,0x5a,0xcc,0x00,0x03,0x1e,0x1c]
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 ; encoding: [0x03,0x40,0x5a,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0xc0,0x5a,0xcc,0x00,0x03,0x1e,0x1c] # clamp
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x48,0x5a,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[1,0,0]
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x5a,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x50,0x5a,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[0,1,0]
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 index_key:2 ; encoding: [0x03,0x50,0x5a,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x60,0x5a,0xcc,0x00,0x03,0x1e,0x1c] # op_sel:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x5a,0xcc,0x00,0x03,0x1e,0x14] # op_sel_hi:[0,1,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x5a,0xcc,0x00,0x03,0x1e,0x0c] # op_sel_hi:[1,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x00,0x5a,0xcc,0x00,0x03,0x1e,0x1c] # op_sel_hi:[1,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x48,0x5a,0xcc,0x00,0x03,0x1e,0x1c] # index_key:1
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 index_key:1 ; encoding: [0x03,0x48,0x5a,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x50,0x5a,0xcc,0x00,0x03,0x1e,0x1c] # index_key:2
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 index_key:2 ; encoding: [0x03,0x50,0x5a,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x58,0x5a,0xcc,0x00,0x03,0x1e,0x1c] # index_key:3
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], v7 index_key:3 ; encoding: [0x03,0x58,0x5a,0xcc,0x00,0x03,0x1e,0x1c]
+
+[0x03,0x40,0x5a,0xcc,0x00,0x03,0x1e,0x3c] # neg_lo:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x5a,0xcc,0x00,0x03,0x1e,0x5c] # neg_lo:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x5a,0xcc,0x00,0x03,0x1e,0x9c] # neg_lo:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x41,0x5a,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[1,0,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x42,0x5a,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[0,1,0]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x44,0x5a,0xcc,0x00,0x03,0x1e,0x1c] # neg_hi:[0,0,1]
+# GFX1170-ERR: warning: invalid instruction encoding
+
+[0x03,0x40,0x5a,0xcc,0x00,0x02,0x1e,0x1c] # sgpr_0 src0
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], s0/*Invalid register, operand has 'VGPR_32' register class*/, v[1:2], v7 ; encoding: [0x03,0x40,0x5a,0xcc,0x00,0x02,0x1e,0x1c]
+
+[0x03,0x40,0x5a,0xcc,0x00,0x01,0x1c,0x1c] # sgpr_0 src1
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, s[0:1]/*Invalid register, operand has 'VReg_64' register class*/, v7 ; encoding: [0x03,0x40,0x5a,0xcc,0x00,0x01,0x1c,0x1c]
+
+[0x03,0x40,0x5a,0xcc,0x00,0x03,0x02,0x18] # sgpr_0 src2
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], s0/*Invalid register, operand has 'VGPR_32' register class*/ ; encoding: [0x03,0x40,0x5a,0xcc,0x00,0x03,0x02,0x18]
+
+[0x03,0x40,0x5a,0xcc,0xf2,0x02,0x1e,0x1c] # 1.0 src0
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], 1.0/*Invalid immediate*/, v[1:2], v7 ; encoding: [0x03,0x40,0x5a,0xcc,0x00,0x02,0x1e,0x1c]
+
+[0x03,0x40,0x5a,0xcc,0x00,0xe5,0x1d,0x1c] # 1.0 src1
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, 1.0/*Invalid immediate*/, v7 ; encoding: [0x03,0x40,0x5a,0xcc,0x00,0x01,0x1c,0x1c]
+
+[0x03,0x40,0x5a,0xcc,0x00,0x03,0xca,0x1b] # 1.0 src2
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], 1.0/*Invalid immediate*/ ; encoding: [0x03,0x40,0x5a,0xcc,0x00,0x03,0x02,0x18]
+
+[0x03,0x40,0x5a,0xcc,0x00,0x03,0x06,0x1a] # 1 src2
+# GFX1170: v_swmmac_f32_16x16x32_bf8_bf8 v[3:6], v0, v[1:2], 1/*Invalid immediate*/ ; encoding: [0x03,0x40,0x5a,0xcc,0x00,0x03,0x06,0x18]