[llvm] [AMDGPU] Allow bank conflicts on src0 for V_DUAL_MOV_B32 for gfx1170 (PR #186100)

Mirko BrkuĊĦanin via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 12 04:47:16 PDT 2026


https://github.com/mbrkusanin created https://github.com/llvm/llvm-project/pull/186100

None

>From 78f075d11ead1c46f82762b5443732ef23304f02 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Thu, 12 Mar 2026 12:43:56 +0100
Subject: [PATCH] [AMDGPU] Allow bank conflicts on src0 for V_DUAL_MOV_B32 for
 gfx1170

---
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |   3 +-
 llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp       |   4 +-
 llvm/test/CodeGen/AMDGPU/vopd-combine.mir     | 350 +++++++++----
 .../test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll | 486 ++++++------------
 llvm/test/MC/AMDGPU/gfx1170_asm_features.s    |   8 +
 5 files changed, 397 insertions(+), 454 deletions(-)
 create mode 100644 llvm/test/MC/AMDGPU/gfx1170_asm_features.s

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index bbf1e2be86950..34c9ba58fd6b7 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -3967,9 +3967,10 @@ AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
                : MCRegister();
   };
 
-  // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
+  // On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
   // source-cache.
   bool SkipSrc =
+      Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 ||
       Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
       Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
       Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
diff --git a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
index 800d5bfa2314f..b17cabf37d53f 100644
--- a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
@@ -142,9 +142,9 @@ bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
   if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
     return false;
 
-  // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
+  // On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
   // source-cache.
-  bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 &&
+  bool SkipSrc = (ST.hasGFX11_7Insts() || ST.hasGFX12Insts()) &&
                  MIX.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
                  MIY.getOpcode() == AMDGPU::V_MOV_B32_e32;
   bool AllowSameVGPR = ST.hasGFX1250Insts();
diff --git a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
index 3a2b0996edacf..b5c25f862ba29 100644
--- a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
@@ -1,6 +1,8 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=postmisched %s -o - | FileCheck -check-prefix=SCHED %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=postmisched,gcn-create-vopd %s -o - | FileCheck -check-prefixes=PAIR,PAIR-GFX11 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=postmisched,gcn-create-vopd %s -o - | FileCheck -check-prefixes=PAIR,PAIR-GFX1100 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -verify-machineinstrs -run-pass=postmisched %s -o - | FileCheck -check-prefix=SCHED %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1170 -verify-machineinstrs -run-pass=postmisched,gcn-create-vopd %s -o - | FileCheck -check-prefixes=PAIR,PAIR-GFX1170 %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=postmisched %s -o - | FileCheck -check-prefix=SCHED %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=postmisched,gcn-create-vopd %s -o - | FileCheck -check-prefixes=PAIR,PAIR-GFX12 %s
 
@@ -43,12 +45,19 @@ body:             |
     ; SCHED-NEXT: $vgpr6 = V_MUL_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec
     ; SCHED-NEXT: $vgpr4 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
     ;
-    ; PAIR-GFX11-LABEL: name: vopd_schedule
-    ; PAIR-GFX11: $vgpr0 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr1, killed $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr4 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-LABEL: name: vopd_schedule
+    ; PAIR-GFX1100: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr1, killed $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr4 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ;
+    ; PAIR-GFX1170-LABEL: name: vopd_schedule
+    ; PAIR-GFX1170: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1170 $vgpr1, $vgpr1, killed $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr4 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
     ;
     ; PAIR-GFX12-LABEL: name: vopd_schedule
     ; PAIR-GFX12: $vgpr0 = IMPLICIT_DEF
@@ -80,12 +89,19 @@ body:             |
     ; SCHED-NEXT: $vgpr5 = V_FMAMK_F32 killed $vgpr0, 10, killed $vgpr3, implicit $mode, implicit $exec
     ; SCHED-NEXT: $vgpr2 = V_FMAC_F32_e32 killed $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
     ;
-    ; PAIR-GFX11-LABEL: name: vopd_fmamk
-    ; PAIR-GFX11: $vgpr2 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr0 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr3 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx11 killed $vgpr0, 10, killed $vgpr3, killed $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-LABEL: name: vopd_fmamk
+    ; PAIR-GFX1100: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx11 killed $vgpr0, 10, killed $vgpr3, killed $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ;
+    ; PAIR-GFX1170-LABEL: name: vopd_fmamk
+    ; PAIR-GFX1170: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1170 killed $vgpr0, 10, killed $vgpr3, killed $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
     ;
     ; PAIR-GFX12-LABEL: name: vopd_fmamk
     ; PAIR-GFX12: $vgpr2 = IMPLICIT_DEF
@@ -155,19 +171,33 @@ body:             |
     ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e32 $sgpr20, $vgpr3, implicit $mode, implicit $exec
     ; SCHED-NEXT: $vgpr9 = V_CNDMASK_B32_e32 killed $sgpr20, killed $vgpr3, implicit $mode, implicit $exec, implicit killed $vcc_lo
     ;
-    ; PAIR-GFX11-LABEL: name: vopd_cndmask
-    ; PAIR-GFX11: liveins: $vcc_lo
-    ; PAIR-GFX11-NEXT: {{  $}}
-    ; PAIR-GFX11-NEXT: $vgpr2 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr0 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr3 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $sgpr20 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr4 = V_FMAMK_F32 $sgpr20, 12345, $vgpr3, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32_gfx11 $sgpr20, killed $vgpr1, killed $vgpr2, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo
-    ; PAIR-GFX11-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo
-    ; PAIR-GFX11-NEXT: $vgpr6 = V_ADD_F32_e32 $sgpr20, $vgpr3, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr9 = V_CNDMASK_B32_e32 killed $sgpr20, killed $vgpr3, implicit $mode, implicit $exec, implicit killed $vcc_lo
+    ; PAIR-GFX1100-LABEL: name: vopd_cndmask
+    ; PAIR-GFX1100: liveins: $vcc_lo
+    ; PAIR-GFX1100-NEXT: {{  $}}
+    ; PAIR-GFX1100-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $sgpr20 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr4 = V_FMAMK_F32 $sgpr20, 12345, $vgpr3, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32_gfx11 $sgpr20, killed $vgpr1, killed $vgpr2, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo
+    ; PAIR-GFX1100-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo
+    ; PAIR-GFX1100-NEXT: $vgpr6 = V_ADD_F32_e32 $sgpr20, $vgpr3, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr9 = V_CNDMASK_B32_e32 killed $sgpr20, killed $vgpr3, implicit $mode, implicit $exec, implicit killed $vcc_lo
+    ;
+    ; PAIR-GFX1170-LABEL: name: vopd_cndmask
+    ; PAIR-GFX1170: liveins: $vcc_lo
+    ; PAIR-GFX1170-NEXT: {{  $}}
+    ; PAIR-GFX1170-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $sgpr20 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr4 = V_FMAMK_F32 $sgpr20, 12345, $vgpr3, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32_gfx1170 $sgpr20, killed $vgpr1, killed $vgpr2, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo
+    ; PAIR-GFX1170-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo
+    ; PAIR-GFX1170-NEXT: $vgpr6 = V_ADD_F32_e32 $sgpr20, $vgpr3, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr9 = V_CNDMASK_B32_e32 killed $sgpr20, killed $vgpr3, implicit $mode, implicit $exec, implicit killed $vcc_lo
     ;
     ; PAIR-GFX12-LABEL: name: vopd_cndmask
     ; PAIR-GFX12: liveins: $vcc_lo
@@ -211,10 +241,15 @@ body:             |
     ; SCHED-NEXT: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
     ; SCHED-NEXT: $vgpr3 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
     ;
-    ; PAIR-GFX11-LABEL: name: vopd_mov
-    ; PAIR-GFX11: $vgpr0 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx11 killed $vgpr0, killed $vgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-LABEL: name: vopd_mov
+    ; PAIR-GFX1100: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx11 killed $vgpr0, killed $vgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ;
+    ; PAIR-GFX1170-LABEL: name: vopd_mov
+    ; PAIR-GFX1170: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx1170 killed $vgpr0, killed $vgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
     ;
     ; PAIR-GFX12-LABEL: name: vopd_mov
     ; PAIR-GFX12: $vgpr0 = IMPLICIT_DEF
@@ -239,10 +274,15 @@ body:             |
     ; SCHED-NEXT: $vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec
     ; SCHED-NEXT: $vgpr3 = V_MOV_B32_e32 killed $sgpr7, implicit $exec
     ;
-    ; PAIR-GFX11-LABEL: name: vopd_mov_mov
-    ; PAIR-GFX11: $sgpr0 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $sgpr7 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx11 killed $sgpr0, killed $sgpr7, implicit $exec, implicit $exec, implicit $exec
+    ; PAIR-GFX1100-LABEL: name: vopd_mov_mov
+    ; PAIR-GFX1100: $sgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $sgpr7 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx11 killed $sgpr0, killed $sgpr7, implicit $exec, implicit $exec, implicit $exec
+    ;
+    ; PAIR-GFX1170-LABEL: name: vopd_mov_mov
+    ; PAIR-GFX1170: $sgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $sgpr7 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 killed $sgpr0, killed $sgpr7, implicit $exec, implicit $exec, implicit $exec
     ;
     ; PAIR-GFX12-LABEL: name: vopd_mov_mov
     ; PAIR-GFX12: $sgpr0 = IMPLICIT_DEF
@@ -300,12 +340,19 @@ body:             |
     ; SCHED-NEXT: $vgpr5 = V_FMAMK_F32 killed $vgpr0, 100, killed $vgpr3, implicit $mode, implicit $exec
     ; SCHED-NEXT: $vgpr2 = V_FMAC_F32_e32 4, killed $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
     ;
-    ; PAIR-GFX11-LABEL: name: vopd_constants_inlinable
-    ; PAIR-GFX11: $vgpr2 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr0 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr3 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx11 killed $vgpr0, 100, killed $vgpr3, 4, killed $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-LABEL: name: vopd_constants_inlinable
+    ; PAIR-GFX1100: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx11 killed $vgpr0, 100, killed $vgpr3, 4, killed $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ;
+    ; PAIR-GFX1170-LABEL: name: vopd_constants_inlinable
+    ; PAIR-GFX1170: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1170 killed $vgpr0, 100, killed $vgpr3, 4, killed $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
     ;
     ; PAIR-GFX12-LABEL: name: vopd_constants_inlinable
     ; PAIR-GFX12: $vgpr2 = IMPLICIT_DEF
@@ -338,12 +385,19 @@ body:             |
     ; SCHED-NEXT: $vgpr5 = V_FMAMK_F32 killed $vgpr0, 100, killed $vgpr3, implicit $mode, implicit $exec
     ; SCHED-NEXT: $vgpr2 = V_FMAC_F32_e32 100, killed $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
     ;
-    ; PAIR-GFX11-LABEL: name: vopd_constants_same
-    ; PAIR-GFX11: $vgpr2 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr0 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr3 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx11 killed $vgpr0, 100, killed $vgpr3, 100, killed $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-LABEL: name: vopd_constants_same
+    ; PAIR-GFX1100: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx11 killed $vgpr0, 100, killed $vgpr3, 100, killed $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ;
+    ; PAIR-GFX1170-LABEL: name: vopd_constants_same
+    ; PAIR-GFX1170: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1170 killed $vgpr0, 100, killed $vgpr3, 100, killed $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
     ;
     ; PAIR-GFX12-LABEL: name: vopd_constants_same
     ; PAIR-GFX12: $vgpr2 = IMPLICIT_DEF
@@ -373,10 +427,15 @@ body:             |
     ; SCHED-NEXT: $vgpr1 = V_MOV_B32_e32 981467136, implicit $exec
     ; SCHED-NEXT: $vgpr2 = V_FMAAK_F32 killed $sgpr0, killed $vgpr0, 981467136, implicit $mode, implicit $exec
     ;
-    ; PAIR-GFX11-LABEL: name: vopd_mov_fmaak_constants_same
-    ; PAIR-GFX11: $vgpr0 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $sgpr0 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr1, $vgpr2 = V_DUAL_MOV_B32_e32_X_FMAAK_F32_gfx11 981467136, killed $sgpr0, killed $vgpr0, 981467136, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-LABEL: name: vopd_mov_fmaak_constants_same
+    ; PAIR-GFX1100: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr1, $vgpr2 = V_DUAL_MOV_B32_e32_X_FMAAK_F32_gfx11 981467136, killed $sgpr0, killed $vgpr0, 981467136, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ;
+    ; PAIR-GFX1170-LABEL: name: vopd_mov_fmaak_constants_same
+    ; PAIR-GFX1170: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr1, $vgpr2 = V_DUAL_MOV_B32_e32_X_FMAAK_F32_gfx1170 981467136, killed $sgpr0, killed $vgpr0, 981467136, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
     ;
     ; PAIR-GFX12-LABEL: name: vopd_mov_fmaak_constants_same
     ; PAIR-GFX12: $vgpr0 = IMPLICIT_DEF
@@ -403,11 +462,17 @@ body:             |
     ; SCHED-NEXT: DBG_VALUE $vgpr0, 0, 0
     ; SCHED-NEXT: $vgpr6 = V_MUL_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec
     ;
-    ; PAIR-GFX11-LABEL: name: vopd_debug
-    ; PAIR-GFX11: $vgpr0 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 killed $vgpr1, $vgpr1, killed $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: DBG_VALUE $vgpr0, 0, 0
+    ; PAIR-GFX1100-LABEL: name: vopd_debug
+    ; PAIR-GFX1100: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 killed $vgpr1, $vgpr1, killed $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: DBG_VALUE $vgpr0, 0, 0
+    ;
+    ; PAIR-GFX1170-LABEL: name: vopd_debug
+    ; PAIR-GFX1170: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1170 killed $vgpr1, $vgpr1, killed $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: DBG_VALUE $vgpr0, 0, 0
     ;
     ; PAIR-GFX12-LABEL: name: vopd_debug
     ; PAIR-GFX12: $vgpr0 = IMPLICIT_DEF
@@ -451,23 +516,41 @@ body:             |
     ; SCHED-NEXT: $vgpr16 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
     ; SCHED-NEXT: $vgpr14 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
     ;
-    ; PAIR-GFX11-LABEL: name: vopd_schedule_unconstrained
-    ; PAIR-GFX11: liveins: $vcc_lo
-    ; PAIR-GFX11-NEXT: {{  $}}
-    ; PAIR-GFX11-NEXT: $vgpr2 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr3 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr0 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr4 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr12, $vgpr11 = V_DUAL_ADD_F32_e32_X_CNDMASK_B32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo
-    ; PAIR-GFX11-NEXT: $vgpr19 = V_CNDMASK_B32_e32 $vgpr0, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo
-    ; PAIR-GFX11-NEXT: $vgpr17, $vgpr10 = V_DUAL_MUL_F32_e32_X_CNDMASK_B32_e32_gfx11 killed $vgpr0, $vgpr0, $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo
-    ; PAIR-GFX11-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit killed $vcc_lo
-    ; PAIR-GFX11-NEXT: $vgpr16 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr14 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-LABEL: name: vopd_schedule_unconstrained
+    ; PAIR-GFX1100: liveins: $vcc_lo
+    ; PAIR-GFX1100-NEXT: {{  $}}
+    ; PAIR-GFX1100-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr4 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr12, $vgpr11 = V_DUAL_ADD_F32_e32_X_CNDMASK_B32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo
+    ; PAIR-GFX1100-NEXT: $vgpr19 = V_CNDMASK_B32_e32 $vgpr0, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo
+    ; PAIR-GFX1100-NEXT: $vgpr17, $vgpr10 = V_DUAL_MUL_F32_e32_X_CNDMASK_B32_e32_gfx11 killed $vgpr0, $vgpr0, $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo
+    ; PAIR-GFX1100-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit killed $vcc_lo
+    ; PAIR-GFX1100-NEXT: $vgpr16 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr14 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ;
+    ; PAIR-GFX1170-LABEL: name: vopd_schedule_unconstrained
+    ; PAIR-GFX1170: liveins: $vcc_lo
+    ; PAIR-GFX1170-NEXT: {{  $}}
+    ; PAIR-GFX1170-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr4 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1170 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr12, $vgpr11 = V_DUAL_ADD_F32_e32_X_CNDMASK_B32_e32_gfx1170 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo
+    ; PAIR-GFX1170-NEXT: $vgpr19 = V_CNDMASK_B32_e32 $vgpr0, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo
+    ; PAIR-GFX1170-NEXT: $vgpr17, $vgpr10 = V_DUAL_MUL_F32_e32_X_CNDMASK_B32_e32_gfx1170 killed $vgpr0, $vgpr0, $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo
+    ; PAIR-GFX1170-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit killed $vcc_lo
+    ; PAIR-GFX1170-NEXT: $vgpr16 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr14 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
     ;
     ; PAIR-GFX12-LABEL: name: vopd_schedule_unconstrained
     ; PAIR-GFX12: liveins: $vcc_lo
@@ -551,32 +634,59 @@ body:             |
     ; SCHED-NEXT: $vgpr34 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
     ; SCHED-NEXT: $vgpr32 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
     ;
-    ; PAIR-GFX11-LABEL: name: vopd_schedule_unconstrained_2
-    ; PAIR-GFX11: liveins: $vcc_lo
-    ; PAIR-GFX11-NEXT: {{  $}}
-    ; PAIR-GFX11-NEXT: $vgpr2 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr3 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr0 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr20 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr16, $vgpr35 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr4, $vgpr29 = V_DUAL_SUB_F32_e32_X_CNDMASK_B32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo
-    ; PAIR-GFX11-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32_gfx11 $vgpr0, $vgpr3, 10, $vgpr1, killed $vgpr20, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo
-    ; PAIR-GFX11-NEXT: $vgpr10, $vgpr17 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr2, $vgpr0, $vgpr0, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr11, $vgpr12 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_gfx11 $vgpr0, $vgpr3, $vgpr1, $vgpr1, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr37, $vgpr14 = V_DUAL_CNDMASK_B32_e32_X_SUB_F32_e32_gfx11 $vgpr0, killed $vgpr3, $vgpr1, $vgpr1, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr20 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr21, $vgpr24 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr1, killed $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr28 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo
-    ; PAIR-GFX11-NEXT: $vgpr22 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr31 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr33 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit killed $vcc_lo
-    ; PAIR-GFX11-NEXT: $vgpr34 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr32 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-LABEL: name: vopd_schedule_unconstrained_2
+    ; PAIR-GFX1100: liveins: $vcc_lo
+    ; PAIR-GFX1100-NEXT: {{  $}}
+    ; PAIR-GFX1100-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr20 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr16, $vgpr35 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr4, $vgpr29 = V_DUAL_SUB_F32_e32_X_CNDMASK_B32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo
+    ; PAIR-GFX1100-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32_gfx11 $vgpr0, $vgpr3, 10, $vgpr1, killed $vgpr20, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo
+    ; PAIR-GFX1100-NEXT: $vgpr10, $vgpr17 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr2, $vgpr0, $vgpr0, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr11, $vgpr12 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_gfx11 $vgpr0, $vgpr3, $vgpr1, $vgpr1, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr37, $vgpr14 = V_DUAL_CNDMASK_B32_e32_X_SUB_F32_e32_gfx11 $vgpr0, killed $vgpr3, $vgpr1, $vgpr1, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr20 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr21, $vgpr24 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr1, killed $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr28 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo
+    ; PAIR-GFX1100-NEXT: $vgpr22 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr31 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr33 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit killed $vcc_lo
+    ; PAIR-GFX1100-NEXT: $vgpr34 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr32 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ;
+    ; PAIR-GFX1170-LABEL: name: vopd_schedule_unconstrained_2
+    ; PAIR-GFX1170: liveins: $vcc_lo
+    ; PAIR-GFX1170-NEXT: {{  $}}
+    ; PAIR-GFX1170-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr20 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr16, $vgpr35 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1170 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1170 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr4, $vgpr29 = V_DUAL_SUB_F32_e32_X_CNDMASK_B32_e32_gfx1170 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo
+    ; PAIR-GFX1170-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32_gfx1170 $vgpr0, $vgpr3, 10, $vgpr1, killed $vgpr20, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo
+    ; PAIR-GFX1170-NEXT: $vgpr10, $vgpr17 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx1170 $vgpr1, $vgpr2, $vgpr0, $vgpr0, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr11, $vgpr12 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_gfx1170 $vgpr0, $vgpr3, $vgpr1, $vgpr1, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr37, $vgpr14 = V_DUAL_CNDMASK_B32_e32_X_SUB_F32_e32_gfx1170 $vgpr0, killed $vgpr3, $vgpr1, $vgpr1, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr20 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr21, $vgpr24 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1170 $vgpr1, $vgpr1, killed $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr28 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo
+    ; PAIR-GFX1170-NEXT: $vgpr22 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr31 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr33 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit killed $vcc_lo
+    ; PAIR-GFX1170-NEXT: $vgpr34 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr32 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
     ;
     ; PAIR-GFX12-LABEL: name: vopd_schedule_unconstrained_2
     ; PAIR-GFX12: liveins: $vcc_lo
@@ -657,11 +767,17 @@ body: |
     ; SCHED-NEXT: $vgpr4 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @lds, implicit $exec
     ; SCHED-NEXT: $vgpr5 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @lds, implicit $exec
     ;
-    ; PAIR-GFX11-LABEL: name: vopd_mov_fixup
-    ; PAIR-GFX11: $vgpr0 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx11 target-flags(amdgpu-abs32-lo) @lds, killed $vgpr0, killed $vgpr1, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr4, $vgpr5 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx11 target-flags(amdgpu-abs32-lo) @lds, target-flags(amdgpu-abs32-lo) @lds, implicit $exec, implicit $exec, implicit $exec
+    ; PAIR-GFX1100-LABEL: name: vopd_mov_fixup
+    ; PAIR-GFX1100: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx11 target-flags(amdgpu-abs32-lo) @lds, killed $vgpr0, killed $vgpr1, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr4, $vgpr5 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx11 target-flags(amdgpu-abs32-lo) @lds, target-flags(amdgpu-abs32-lo) @lds, implicit $exec, implicit $exec, implicit $exec
+    ;
+    ; PAIR-GFX1170-LABEL: name: vopd_mov_fixup
+    ; PAIR-GFX1170: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx1170 target-flags(amdgpu-abs32-lo) @lds, killed $vgpr0, killed $vgpr1, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1170-NEXT: $vgpr4, $vgpr5 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 target-flags(amdgpu-abs32-lo) @lds, target-flags(amdgpu-abs32-lo) @lds, implicit $exec, implicit $exec, implicit $exec
     ;
     ; PAIR-GFX12-LABEL: name: vopd_mov_fixup
     ; PAIR-GFX12: $vgpr0 = IMPLICIT_DEF
@@ -726,11 +842,16 @@ body:             |
     ; SCHED-NEXT: $vgpr2 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
     ; SCHED-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr5, implicit $exec
     ;
-    ; PAIR-GFX11-LABEL: name: vopd_mov_mov_same_src_bank
-    ; PAIR-GFX11: $vgpr1 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr5 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr2 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
-    ; PAIR-GFX11-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr5, implicit $exec
+    ; PAIR-GFX1100-LABEL: name: vopd_mov_mov_same_src_bank
+    ; PAIR-GFX1100: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr2 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+    ; PAIR-GFX1100-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr5, implicit $exec
+    ;
+    ; PAIR-GFX1170-LABEL: name: vopd_mov_mov_same_src_bank
+    ; PAIR-GFX1170: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 killed $vgpr1, killed $vgpr5, implicit $exec, implicit $exec, implicit $exec
     ;
     ; PAIR-GFX12-LABEL: name: vopd_mov_mov_same_src_bank
     ; PAIR-GFX12: $vgpr1 = IMPLICIT_DEF
@@ -754,10 +875,15 @@ body:             |
     ; SCHED-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
     ; SCHED-NEXT: $vgpr1 = V_ADD_F32_e32 killed $vgpr3, $vgpr3, implicit $mode, implicit $exec
     ;
-    ; PAIR-GFX11-LABEL: name: vopd_combine_opy_overwrites_opx
-    ; PAIR-GFX11: $vgpr1 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr3 = IMPLICIT_DEF
-    ; PAIR-GFX11-NEXT: $vgpr0, $vgpr1 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx11 killed $vgpr1, killed $vgpr3, $vgpr3, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-GFX1100-LABEL: name: vopd_combine_opy_overwrites_opx
+    ; PAIR-GFX1100: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-GFX1100-NEXT: $vgpr0, $vgpr1 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx11 killed $vgpr1, killed $vgpr3, $vgpr3, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ;
+    ; PAIR-GFX1170-LABEL: name: vopd_combine_opy_overwrites_opx
+    ; PAIR-GFX1170: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-GFX1170-NEXT: $vgpr0, $vgpr1 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx1170 killed $vgpr1, killed $vgpr3, $vgpr3, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
     ;
     ; PAIR-GFX12-LABEL: name: vopd_combine_opy_overwrites_opx
     ; PAIR-GFX12: $vgpr1 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll
index 2558dc3903640..7148f3d614650 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll
@@ -17,37 +17,19 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16_imm_non_inlineable(<8 x half> %A, <8 x half> %B, ptr addrspace(1) %out) {
-; GFX1170-LABEL: test_wmma_f32_16x16x16_f16_imm_non_inlineable:
-; GFX1170:       ; %bb.0: ; %bb
-; GFX1170-NEXT:    v_mov_b32_e32 v10, 0x40400000
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_mov_b32_e32 v11, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v12, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v13, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v14, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v15, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v16, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v17, v10
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_wmma_f32_16x16x16_f16 v[10:17], v[0:3], v[4:7], v[10:17]
-; GFX1170-NEXT:    s_clause 0x1
-; GFX1170-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
-; GFX1170-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX1170-NEXT:    s_endpgm
-;
-; GFX12-LABEL: test_wmma_f32_16x16x16_f16_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v10, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v12, v10
-; GFX12-NEXT:    v_dual_mov_b32 v13, v10 :: v_dual_mov_b32 v14, v10
-; GFX12-NEXT:    v_dual_mov_b32 v15, v10 :: v_dual_mov_b32 v16, v10
-; GFX12-NEXT:    v_mov_b32_e32 v17, v10
-; GFX12-NEXT:    v_wmma_f32_16x16x16_f16 v[10:17], v[0:3], v[4:7], v[10:17]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_f16_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v10, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v12, v10
+; GCN-NEXT:    v_dual_mov_b32 v13, v10 :: v_dual_mov_b32 v14, v10
+; GCN-NEXT:    v_dual_mov_b32 v15, v10 :: v_dual_mov_b32 v16, v10
+; GCN-NEXT:    v_mov_b32_e32 v17, v10
+; GCN-NEXT:    v_wmma_f32_16x16x16_f16 v[10:17], v[0:3], v[4:7], v[10:17]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> %A, <8 x half> %B, <8 x float> <float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -69,37 +51,19 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16_imm_non_inlineable(<8 x i16> %A, <8 x i16> %B, ptr addrspace(1) %out) {
-; GFX1170-LABEL: test_wmma_f32_16x16x16_bf16_imm_non_inlineable:
-; GFX1170:       ; %bb.0: ; %bb
-; GFX1170-NEXT:    v_mov_b32_e32 v10, 0x40400000
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_mov_b32_e32 v11, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v12, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v13, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v14, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v15, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v16, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v17, v10
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_wmma_f32_16x16x16_bf16 v[10:17], v[0:3], v[4:7], v[10:17]
-; GFX1170-NEXT:    s_clause 0x1
-; GFX1170-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
-; GFX1170-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX1170-NEXT:    s_endpgm
-;
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf16_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v10, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v12, v10
-; GFX12-NEXT:    v_dual_mov_b32 v13, v10 :: v_dual_mov_b32 v14, v10
-; GFX12-NEXT:    v_dual_mov_b32 v15, v10 :: v_dual_mov_b32 v16, v10
-; GFX12-NEXT:    v_mov_b32_e32 v17, v10
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf16 v[10:17], v[0:3], v[4:7], v[10:17]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf16_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v10, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v12, v10
+; GCN-NEXT:    v_dual_mov_b32 v13, v10 :: v_dual_mov_b32 v14, v10
+; GCN-NEXT:    v_dual_mov_b32 v15, v10 :: v_dual_mov_b32 v16, v10
+; GCN-NEXT:    v_mov_b32_e32 v17, v10
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf16 v[10:17], v[0:3], v[4:7], v[10:17]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[8:9], v[14:17], off offset:16
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16(<8 x i16> %A, <8 x i16> %B, <8 x float> <float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -119,26 +83,15 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f16_16x16x16_f16_imm_non_inlineable(<8 x half> %A, <8 x half> %B, ptr addrspace(1) %out) {
-; GFX1170-LABEL: test_wmma_f16_16x16x16_f16_imm_non_inlineable:
-; GFX1170:       ; %bb.0: ; %bb
-; GFX1170-NEXT:    v_mov_b32_e32 v10, 0x42004200
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX1170-NEXT:    v_mov_b32_e32 v11, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v12, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v13, v10
-; GFX1170-NEXT:    v_wmma_f16_16x16x16_f16 v[10:13], v[0:3], v[4:7], v[10:13]
-; GFX1170-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX1170-NEXT:    s_endpgm
-;
-; GFX12-LABEL: test_wmma_f16_16x16x16_f16_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v10, 0x42004200
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v12, v10
-; GFX12-NEXT:    v_mov_b32_e32 v13, v10
-; GFX12-NEXT:    v_wmma_f16_16x16x16_f16 v[10:13], v[0:3], v[4:7], v[10:13]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f16_16x16x16_f16_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v10, 0x42004200
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v12, v10
+; GCN-NEXT:    v_mov_b32_e32 v13, v10
+; GCN-NEXT:    v_wmma_f16_16x16x16_f16 v[10:13], v[0:3], v[4:7], v[10:13]
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> <half 3.0, half 3.0, half 3.0, half 3.0, half 3.0, half 3.0, half 3.0, half 3.0>, i1 0)
   store <8 x half> %res, ptr addrspace(1) %out
@@ -146,26 +99,15 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_imm(<8 x i16> %A, <8 x i16> %B, ptr addrspace(1) %out) {
-; GFX1170-LABEL: test_wmma_bf16_16x16x16_bf16_imm:
-; GFX1170:       ; %bb.0: ; %bb
-; GFX1170-NEXT:    v_mov_b32_e32 v10, 0x3f803f80
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX1170-NEXT:    v_mov_b32_e32 v11, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v12, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v13, v10
-; GFX1170-NEXT:    v_wmma_bf16_16x16x16_bf16 v[10:13], v[0:3], v[4:7], v[10:13]
-; GFX1170-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX1170-NEXT:    s_endpgm
-;
-; GFX12-LABEL: test_wmma_bf16_16x16x16_bf16_imm:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v10, 0x3f803f80
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v12, v10
-; GFX12-NEXT:    v_mov_b32_e32 v13, v10
-; GFX12-NEXT:    v_wmma_bf16_16x16x16_bf16 v[10:13], v[0:3], v[4:7], v[10:13]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_bf16_16x16x16_bf16_imm:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v10, 0x3f803f80
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v12, v10
+; GCN-NEXT:    v_mov_b32_e32 v13, v10
+; GCN-NEXT:    v_wmma_bf16_16x16x16_bf16 v[10:13], v[0:3], v[4:7], v[10:13]
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> <i16 16256, i16 16256, i16 16256, i16 16256, i16 16256, i16 16256, i16 16256, i16 16256>, i1 0)
   store <8 x i16> %res, ptr addrspace(1) %out
@@ -173,26 +115,15 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_imm_non_inlineable(<8 x i16> %A, <8 x i16> %B, ptr addrspace(1) %out) {
-; GFX1170-LABEL: test_wmma_bf16_16x16x16_bf16_imm_non_inlineable:
-; GFX1170:       ; %bb.0: ; %bb
-; GFX1170-NEXT:    v_mov_b32_e32 v10, 0x3fc03fc0
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX1170-NEXT:    v_mov_b32_e32 v11, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v12, v10
-; GFX1170-NEXT:    v_mov_b32_e32 v13, v10
-; GFX1170-NEXT:    v_wmma_bf16_16x16x16_bf16 v[10:13], v[0:3], v[4:7], v[10:13]
-; GFX1170-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX1170-NEXT:    s_endpgm
-;
-; GFX12-LABEL: test_wmma_bf16_16x16x16_bf16_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v10, 0x3fc03fc0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v12, v10
-; GFX12-NEXT:    v_mov_b32_e32 v13, v10
-; GFX12-NEXT:    v_wmma_bf16_16x16x16_bf16 v[10:13], v[0:3], v[4:7], v[10:13]
-; GFX12-NEXT:    global_store_b128 v[8:9], v[10:13], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_bf16_16x16x16_bf16_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v10, 0x3fc03fc0
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v12, v10
+; GCN-NEXT:    v_mov_b32_e32 v13, v10
+; GCN-NEXT:    v_wmma_bf16_16x16x16_bf16 v[10:13], v[0:3], v[4:7], v[10:13]
+; GCN-NEXT:    global_store_b128 v[8:9], v[10:13], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> <i16 16320, i16 16320, i16 16320, i16 16320, i16 16320, i16 16320, i16 16320, i16 16320>, i1 0)
   store <8 x i16> %res, ptr addrspace(1) %out
@@ -214,37 +145,19 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX1170-LABEL: test_wmma_i32_16x16x16_iu8_imm_non_inlineable:
-; GFX1170:       ; %bb.0: ; %bb
-; GFX1170-NEXT:    v_mov_b32_e32 v6, 0x80
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_mov_b32_e32 v7, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v8, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v9, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v10, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v11, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v12, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v13, v6
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_wmma_i32_16x16x16_iu8 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX1170-NEXT:    s_clause 0x1
-; GFX1170-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX1170-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX1170-NEXT:    s_endpgm
-;
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v6, 0x80
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6
-; GFX12-NEXT:    v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6
-; GFX12-NEXT:    v_dual_mov_b32 v11, v6 :: v_dual_mov_b32 v12, v6
-; GFX12-NEXT:    v_mov_b32_e32 v13, v6
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu8 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v6, 0x80
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6
+; GCN-NEXT:    v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6
+; GCN-NEXT:    v_dual_mov_b32 v11, v6 :: v_dual_mov_b32 v12, v6
+; GCN-NEXT:    v_mov_b32_e32 v13, v6
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu8 v[6:13], v[0:1], v[2:3], v[6:13]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> <i32 128, i32 128, i32 128, i32 128, i32 128, i32 128, i32 128, i32 128>, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -266,37 +179,19 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4_imm_non_inlineable(i32 %A, i32 %B, ptr addrspace(1) %out) {
-; GFX1170-LABEL: test_wmma_i32_16x16x16_iu4_imm_non_inlineable:
-; GFX1170:       ; %bb.0: ; %bb
-; GFX1170-NEXT:    v_mov_b32_e32 v4, 0x80
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_mov_b32_e32 v5, v4
-; GFX1170-NEXT:    v_mov_b32_e32 v6, v4
-; GFX1170-NEXT:    v_mov_b32_e32 v7, v4
-; GFX1170-NEXT:    v_mov_b32_e32 v8, v4
-; GFX1170-NEXT:    v_mov_b32_e32 v9, v4
-; GFX1170-NEXT:    v_mov_b32_e32 v10, v4
-; GFX1170-NEXT:    v_mov_b32_e32 v11, v4
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v0, v1, v[4:11]
-; GFX1170-NEXT:    s_clause 0x1
-; GFX1170-NEXT:    global_store_b128 v[2:3], v[8:11], off offset:16
-; GFX1170-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX1170-NEXT:    s_endpgm
-;
-; GFX12-LABEL: test_wmma_i32_16x16x16_iu4_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v4, 0x80
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
-; GFX12-NEXT:    v_dual_mov_b32 v7, v4 :: v_dual_mov_b32 v8, v4
-; GFX12-NEXT:    v_dual_mov_b32 v9, v4 :: v_dual_mov_b32 v10, v4
-; GFX12-NEXT:    v_mov_b32_e32 v11, v4
-; GFX12-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v0, v1, v[4:11]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[2:3], v[8:11], off offset:16
-; GFX12-NEXT:    global_store_b128 v[2:3], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x16_iu4_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v4, 0x80
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4
+; GCN-NEXT:    v_dual_mov_b32 v7, v4 :: v_dual_mov_b32 v8, v4
+; GCN-NEXT:    v_dual_mov_b32 v9, v4 :: v_dual_mov_b32 v10, v4
+; GCN-NEXT:    v_mov_b32_e32 v11, v4
+; GCN-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v0, v1, v[4:11]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[2:3], v[8:11], off offset:16
+; GCN-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 0, i32 %A, i1 0, i32 %B, <8 x i32> <i32 128, i32 128, i32 128, i32 128, i32 128, i32 128, i32 128, i32 128>, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -318,37 +213,19 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX1170-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm_non_inlineable:
-; GFX1170:       ; %bb.0: ; %bb
-; GFX1170-NEXT:    v_mov_b32_e32 v6, 0x40400000
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_mov_b32_e32 v7, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v8, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v9, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v10, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v11, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v12, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v13, v6
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX1170-NEXT:    s_clause 0x1
-; GFX1170-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX1170-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX1170-NEXT:    s_endpgm
-;
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v6, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6
-; GFX12-NEXT:    v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6
-; GFX12-NEXT:    v_dual_mov_b32 v11, v6 :: v_dual_mov_b32 v12, v6
-; GFX12-NEXT:    v_mov_b32_e32 v13, v6
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_fp8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v6, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6
+; GCN-NEXT:    v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6
+; GCN-NEXT:    v_dual_mov_b32 v11, v6 :: v_dual_mov_b32 v12, v6
+; GCN-NEXT:    v_mov_b32_e32 v13, v6
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_fp8 v[6:13], v[0:1], v[2:3], v[6:13]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> <float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -370,37 +247,19 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX1170-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm_non_inlineable:
-; GFX1170:       ; %bb.0: ; %bb
-; GFX1170-NEXT:    v_mov_b32_e32 v6, 0x40400000
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_mov_b32_e32 v7, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v8, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v9, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v10, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v11, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v12, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v13, v6
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX1170-NEXT:    s_clause 0x1
-; GFX1170-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX1170-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX1170-NEXT:    s_endpgm
-;
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v6, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6
-; GFX12-NEXT:    v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6
-; GFX12-NEXT:    v_dual_mov_b32 v11, v6 :: v_dual_mov_b32 v12, v6
-; GFX12-NEXT:    v_mov_b32_e32 v13, v6
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_fp8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v6, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6
+; GCN-NEXT:    v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6
+; GCN-NEXT:    v_dual_mov_b32 v11, v6 :: v_dual_mov_b32 v12, v6
+; GCN-NEXT:    v_mov_b32_e32 v13, v6
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_fp8 v[6:13], v[0:1], v[2:3], v[6:13]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> <float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -422,37 +281,19 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX1170-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm_non_inlineable:
-; GFX1170:       ; %bb.0: ; %bb
-; GFX1170-NEXT:    v_mov_b32_e32 v6, 0x40400000
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_mov_b32_e32 v7, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v8, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v9, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v10, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v11, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v12, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v13, v6
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX1170-NEXT:    s_clause 0x1
-; GFX1170-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX1170-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX1170-NEXT:    s_endpgm
-;
-; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v6, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6
-; GFX12-NEXT:    v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6
-; GFX12-NEXT:    v_dual_mov_b32 v11, v6 :: v_dual_mov_b32 v12, v6
-; GFX12-NEXT:    v_mov_b32_e32 v13, v6
-; GFX12-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_fp8_bf8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v6, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6
+; GCN-NEXT:    v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6
+; GCN-NEXT:    v_dual_mov_b32 v11, v6 :: v_dual_mov_b32 v12, v6
+; GCN-NEXT:    v_mov_b32_e32 v13, v6
+; GCN-NEXT:    v_wmma_f32_16x16x16_fp8_bf8 v[6:13], v[0:1], v[2:3], v[6:13]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> <float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -474,37 +315,19 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX1170-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm_non_inlineable:
-; GFX1170:       ; %bb.0: ; %bb
-; GFX1170-NEXT:    v_mov_b32_e32 v6, 0x40400000
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_mov_b32_e32 v7, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v8, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v9, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v10, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v11, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v12, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v13, v6
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX1170-NEXT:    s_clause 0x1
-; GFX1170-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX1170-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX1170-NEXT:    s_endpgm
-;
-; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v6, 0x40400000
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6
-; GFX12-NEXT:    v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6
-; GFX12-NEXT:    v_dual_mov_b32 v11, v6 :: v_dual_mov_b32 v12, v6
-; GFX12-NEXT:    v_mov_b32_e32 v13, v6
-; GFX12-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_f32_16x16x16_bf8_bf8_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v6, 0x40400000
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6
+; GCN-NEXT:    v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6
+; GCN-NEXT:    v_dual_mov_b32 v11, v6 :: v_dual_mov_b32 v12, v6
+; GCN-NEXT:    v_mov_b32_e32 v13, v6
+; GCN-NEXT:    v_wmma_f32_16x16x16_bf8_bf8 v[6:13], v[0:1], v[2:3], v[6:13]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> <float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>)
   store <8 x float> %res, ptr addrspace(1) %out
@@ -526,37 +349,19 @@ bb:
 }
 
 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4_imm_non_inlineable(<2 x i32> %A, <2 x i32> %B, ptr addrspace(1) %out) {
-; GFX1170-LABEL: test_wmma_i32_16x16x32_iu4_imm_non_inlineable:
-; GFX1170:       ; %bb.0: ; %bb
-; GFX1170-NEXT:    v_mov_b32_e32 v6, 0x80
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_mov_b32_e32 v7, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v8, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v9, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v10, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v11, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v12, v6
-; GFX1170-NEXT:    v_mov_b32_e32 v13, v6
-; GFX1170-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1170-NEXT:    v_wmma_i32_16x16x32_iu4 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX1170-NEXT:    s_clause 0x1
-; GFX1170-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX1170-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX1170-NEXT:    s_endpgm
-;
-; GFX12-LABEL: test_wmma_i32_16x16x32_iu4_imm_non_inlineable:
-; GFX12:       ; %bb.0: ; %bb
-; GFX12-NEXT:    v_mov_b32_e32 v6, 0x80
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6
-; GFX12-NEXT:    v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6
-; GFX12-NEXT:    v_dual_mov_b32 v11, v6 :: v_dual_mov_b32 v12, v6
-; GFX12-NEXT:    v_mov_b32_e32 v13, v6
-; GFX12-NEXT:    v_wmma_i32_16x16x32_iu4 v[6:13], v[0:1], v[2:3], v[6:13]
-; GFX12-NEXT:    s_clause 0x1
-; GFX12-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
-; GFX12-NEXT:    global_store_b128 v[4:5], v[6:9], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: test_wmma_i32_16x16x32_iu4_imm_non_inlineable:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    v_mov_b32_e32 v6, 0x80
+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
+; GCN-NEXT:    v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6
+; GCN-NEXT:    v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6
+; GCN-NEXT:    v_dual_mov_b32 v11, v6 :: v_dual_mov_b32 v12, v6
+; GCN-NEXT:    v_mov_b32_e32 v13, v6
+; GCN-NEXT:    v_wmma_i32_16x16x32_iu4 v[6:13], v[0:1], v[2:3], v[6:13]
+; GCN-NEXT:    s_clause 0x1
+; GCN-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GCN-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GCN-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> <i32 128, i32 128, i32 128, i32 128, i32 128, i32 128, i32 128, i32 128>, i1 0)
   store <8 x i32> %res, ptr addrspace(1) %out
@@ -574,3 +379,6 @@ declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32>
 declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
 declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
 declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <8 x i32>, i1 immarg)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX1170: {{.*}}
+; GFX12: {{.*}}
diff --git a/llvm/test/MC/AMDGPU/gfx1170_asm_features.s b/llvm/test/MC/AMDGPU/gfx1170_asm_features.s
new file mode 100644
index 0000000000000..3cc0cc9b74cf8
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1170_asm_features.s
@@ -0,0 +1,8 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1170 -show-encoding %s | FileCheck -check-prefix=GFX1170 %s
+
+//===----------------------------------------------------------------------===//
+// A VOPD OpY mov_b32 instruction uses SRC2 source-cache if OpX is also mov_b32
+//===----------------------------------------------------------------------===//
+
+v_dual_mov_b32 v2, v5 :: v_dual_mov_b32 v3, v1
+// GFX1170: encoding: [0x05,0x01,0x10,0xca,0x01,0x01,0x02,0x02]



More information about the llvm-commits mailing list