[llvm] 72c1a0d - [AMDGPU] Allow v_accvgpr_write to use SGPR on gfx90a

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 22 13:52:41 PDT 2022


Author: Stanislav Mekhanoshin
Date: 2022-03-22T13:52:29-07:00
New Revision: 72c1a0d9c224ac139ef5dae63f75ae8bbfbaa652

URL: https://github.com/llvm/llvm-project/commit/72c1a0d9c224ac139ef5dae63f75ae8bbfbaa652
DIFF: https://github.com/llvm/llvm-project/commit/72c1a0d9c224ac139ef5dae63f75ae8bbfbaa652.diff

LOG: [AMDGPU] Allow v_accvgpr_write to use SGPR on gfx90a

This is undocumented, but it should work.

Differential Revision: https://reviews.llvm.org/D122252

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
    llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
    llvm/test/CodeGen/AMDGPU/agpr-copy-sgpr-no-vgprs.mir
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
    llvm/test/CodeGen/AMDGPU/mfma-loop.ll
    llvm/test/MC/AMDGPU/mai-gfx90a.s
    llvm/test/MC/AMDGPU/mai-gfx940.s
    llvm/test/MC/Disassembler/AMDGPU/mai-gfx90a.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index ef7c336b4dd83..34251fde11b64 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -3688,7 +3688,7 @@ bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
 
   auto Reg = mc2PseudoReg(Src0.getReg());
   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
-  if (!isGFX940() && isSGPR(Reg, TRI)) {
+  if (!isGFX90A() && isSGPR(Reg, TRI)) {
     Error(getRegLoc(Reg, Operands),
           "source operand must be either a VGPR or an inline constant");
     return false;

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 1310580756f4b..63b2eac3413b4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -822,7 +822,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 
   if (RC == &AMDGPU::AGPR_32RegClass) {
     if (AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
-        (ST.hasGFX940Insts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
+        (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
       BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg)
         .addReg(SrcReg, getKillRegState(KillSrc));
       return;
@@ -949,7 +949,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
       Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
     else if (RI.hasVGPRs(SrcRC) ||
-             (ST.hasGFX940Insts() && RI.isSGPRClass(SrcRC)))
+             (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
       Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
     else
       Opcode = AMDGPU::INSTRUCTION_LIST_END;
@@ -4647,7 +4647,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
   }
 
   if (MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
-      !ST.hasGFX940Insts()) {
+      !ST.hasGFX90AInsts()) {
     const MachineOperand *Src = getNamedOperand(MI, AMDGPU::OpName::src0);
     if (Src->isReg() && RI.isSGPRReg(MRI, Src->getReg())) {
       ErrInfo = "Invalid register class: "
@@ -5032,7 +5032,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
           RI.isAGPR(MRI, MI.getOperand(Data1Idx).getReg()) != IsAGPR)
         return false;
     }
-    if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX940Insts() &&
+    if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
         (int)OpIdx == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) &&
         RI.isSGPRReg(MRI, MO->getReg()))
       return false;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
index 0c4c0a27b7adb..c0b5f13139434 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
@@ -22,70 +22,38 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(<32 x float> addrspace(1
 ; GCN-NEXT:    s_load_dwordx16 s[0:15], s[34:35], 0x0
 ; GCN-NEXT:    s_load_dwordx16 s[16:31], s[34:35], 0x40
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v4, s0
-; GCN-NEXT:    v_accvgpr_write_b32 a0, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s1
-; GCN-NEXT:    v_accvgpr_write_b32 a1, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s2
-; GCN-NEXT:    v_accvgpr_write_b32 a2, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s3
-; GCN-NEXT:    v_accvgpr_write_b32 a3, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s4
-; GCN-NEXT:    v_accvgpr_write_b32 a4, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s5
-; GCN-NEXT:    v_accvgpr_write_b32 a5, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s6
-; GCN-NEXT:    v_accvgpr_write_b32 a6, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s7
-; GCN-NEXT:    v_accvgpr_write_b32 a7, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s8
-; GCN-NEXT:    v_accvgpr_write_b32 a8, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s9
-; GCN-NEXT:    v_accvgpr_write_b32 a9, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s10
-; GCN-NEXT:    v_accvgpr_write_b32 a10, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s11
-; GCN-NEXT:    v_accvgpr_write_b32 a11, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s12
-; GCN-NEXT:    v_accvgpr_write_b32 a12, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s13
-; GCN-NEXT:    v_accvgpr_write_b32 a13, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s14
-; GCN-NEXT:    v_accvgpr_write_b32 a14, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s15
-; GCN-NEXT:    v_accvgpr_write_b32 a15, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s16
-; GCN-NEXT:    v_accvgpr_write_b32 a16, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s17
-; GCN-NEXT:    v_accvgpr_write_b32 a17, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s18
-; GCN-NEXT:    v_accvgpr_write_b32 a18, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s19
-; GCN-NEXT:    v_accvgpr_write_b32 a19, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s20
-; GCN-NEXT:    v_accvgpr_write_b32 a20, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s21
-; GCN-NEXT:    v_accvgpr_write_b32 a21, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s22
-; GCN-NEXT:    v_accvgpr_write_b32 a22, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s23
-; GCN-NEXT:    v_accvgpr_write_b32 a23, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s24
-; GCN-NEXT:    v_accvgpr_write_b32 a24, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s25
-; GCN-NEXT:    v_accvgpr_write_b32 a25, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s26
-; GCN-NEXT:    v_accvgpr_write_b32 a26, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s27
-; GCN-NEXT:    v_accvgpr_write_b32 a27, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s28
-; GCN-NEXT:    v_accvgpr_write_b32 a28, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s29
-; GCN-NEXT:    v_accvgpr_write_b32 a29, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s30
-; GCN-NEXT:    v_accvgpr_write_b32 a30, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s31
-; GCN-NEXT:    v_accvgpr_write_b32 a31, v4
+; GCN-NEXT:    v_accvgpr_write_b32 a0, s0
+; GCN-NEXT:    v_accvgpr_write_b32 a1, s1
+; GCN-NEXT:    v_accvgpr_write_b32 a2, s2
+; GCN-NEXT:    v_accvgpr_write_b32 a3, s3
+; GCN-NEXT:    v_accvgpr_write_b32 a4, s4
+; GCN-NEXT:    v_accvgpr_write_b32 a5, s5
+; GCN-NEXT:    v_accvgpr_write_b32 a6, s6
+; GCN-NEXT:    v_accvgpr_write_b32 a7, s7
+; GCN-NEXT:    v_accvgpr_write_b32 a8, s8
+; GCN-NEXT:    v_accvgpr_write_b32 a9, s9
+; GCN-NEXT:    v_accvgpr_write_b32 a10, s10
+; GCN-NEXT:    v_accvgpr_write_b32 a11, s11
+; GCN-NEXT:    v_accvgpr_write_b32 a12, s12
+; GCN-NEXT:    v_accvgpr_write_b32 a13, s13
+; GCN-NEXT:    v_accvgpr_write_b32 a14, s14
+; GCN-NEXT:    v_accvgpr_write_b32 a15, s15
+; GCN-NEXT:    v_accvgpr_write_b32 a16, s16
+; GCN-NEXT:    v_accvgpr_write_b32 a17, s17
+; GCN-NEXT:    v_accvgpr_write_b32 a18, s18
+; GCN-NEXT:    v_accvgpr_write_b32 a19, s19
+; GCN-NEXT:    v_accvgpr_write_b32 a20, s20
+; GCN-NEXT:    v_accvgpr_write_b32 a21, s21
+; GCN-NEXT:    v_accvgpr_write_b32 a22, s22
+; GCN-NEXT:    v_accvgpr_write_b32 a23, s23
+; GCN-NEXT:    v_accvgpr_write_b32 a24, s24
+; GCN-NEXT:    v_accvgpr_write_b32 a25, s25
+; GCN-NEXT:    v_accvgpr_write_b32 a26, s26
+; GCN-NEXT:    v_accvgpr_write_b32 a27, s27
+; GCN-NEXT:    v_accvgpr_write_b32 a28, s28
+; GCN-NEXT:    v_accvgpr_write_b32 a29, s29
+; GCN-NEXT:    v_accvgpr_write_b32 a30, s30
+; GCN-NEXT:    v_accvgpr_write_b32 a31, s31
 ; GCN-NEXT:    s_nop 1
 ; GCN-NEXT:    v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], v[2:3], a[0:31] cbsz:1 abid:2 blgp:3
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
@@ -121,38 +89,22 @@ define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(<16 x float> addrspace(1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_load_dwordx16 s[0:15], s[16:17], 0x0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v4, s0
-; GCN-NEXT:    v_accvgpr_write_b32 a0, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s1
-; GCN-NEXT:    v_accvgpr_write_b32 a1, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s2
-; GCN-NEXT:    v_accvgpr_write_b32 a2, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s3
-; GCN-NEXT:    v_accvgpr_write_b32 a3, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s4
-; GCN-NEXT:    v_accvgpr_write_b32 a4, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s5
-; GCN-NEXT:    v_accvgpr_write_b32 a5, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s6
-; GCN-NEXT:    v_accvgpr_write_b32 a6, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s7
-; GCN-NEXT:    v_accvgpr_write_b32 a7, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s8
-; GCN-NEXT:    v_accvgpr_write_b32 a8, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s9
-; GCN-NEXT:    v_accvgpr_write_b32 a9, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s10
-; GCN-NEXT:    v_accvgpr_write_b32 a10, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s11
-; GCN-NEXT:    v_accvgpr_write_b32 a11, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s12
-; GCN-NEXT:    v_accvgpr_write_b32 a12, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s13
-; GCN-NEXT:    v_accvgpr_write_b32 a13, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s14
-; GCN-NEXT:    v_accvgpr_write_b32 a14, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s15
-; GCN-NEXT:    v_accvgpr_write_b32 a15, v4
+; GCN-NEXT:    v_accvgpr_write_b32 a0, s0
+; GCN-NEXT:    v_accvgpr_write_b32 a1, s1
+; GCN-NEXT:    v_accvgpr_write_b32 a2, s2
+; GCN-NEXT:    v_accvgpr_write_b32 a3, s3
+; GCN-NEXT:    v_accvgpr_write_b32 a4, s4
+; GCN-NEXT:    v_accvgpr_write_b32 a5, s5
+; GCN-NEXT:    v_accvgpr_write_b32 a6, s6
+; GCN-NEXT:    v_accvgpr_write_b32 a7, s7
+; GCN-NEXT:    v_accvgpr_write_b32 a8, s8
+; GCN-NEXT:    v_accvgpr_write_b32 a9, s9
+; GCN-NEXT:    v_accvgpr_write_b32 a10, s10
+; GCN-NEXT:    v_accvgpr_write_b32 a11, s11
+; GCN-NEXT:    v_accvgpr_write_b32 a12, s12
+; GCN-NEXT:    v_accvgpr_write_b32 a13, s13
+; GCN-NEXT:    v_accvgpr_write_b32 a14, s14
+; GCN-NEXT:    v_accvgpr_write_b32 a15, s15
 ; GCN-NEXT:    s_nop 1
 ; GCN-NEXT:    v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
@@ -183,14 +135,10 @@ define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(<4 x float> addrspace(1)*
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v4, s0
-; GCN-NEXT:    v_accvgpr_write_b32 a0, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s1
-; GCN-NEXT:    v_accvgpr_write_b32 a1, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s2
-; GCN-NEXT:    v_accvgpr_write_b32 a2, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s3
-; GCN-NEXT:    v_accvgpr_write_b32 a3, v4
+; GCN-NEXT:    v_accvgpr_write_b32 a0, s0
+; GCN-NEXT:    v_accvgpr_write_b32 a1, s1
+; GCN-NEXT:    v_accvgpr_write_b32 a2, s2
+; GCN-NEXT:    v_accvgpr_write_b32 a3, s3
 ; GCN-NEXT:    s_nop 1
 ; GCN-NEXT:    v_mfma_f32_4x4x4bf16_1k a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
@@ -217,38 +165,22 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(<16 x float> addrspace(1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_load_dwordx16 s[0:15], s[16:17], 0x0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v4, s0
-; GCN-NEXT:    v_accvgpr_write_b32 a0, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s1
-; GCN-NEXT:    v_accvgpr_write_b32 a1, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s2
-; GCN-NEXT:    v_accvgpr_write_b32 a2, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s3
-; GCN-NEXT:    v_accvgpr_write_b32 a3, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s4
-; GCN-NEXT:    v_accvgpr_write_b32 a4, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s5
-; GCN-NEXT:    v_accvgpr_write_b32 a5, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s6
-; GCN-NEXT:    v_accvgpr_write_b32 a6, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s7
-; GCN-NEXT:    v_accvgpr_write_b32 a7, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s8
-; GCN-NEXT:    v_accvgpr_write_b32 a8, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s9
-; GCN-NEXT:    v_accvgpr_write_b32 a9, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s10
-; GCN-NEXT:    v_accvgpr_write_b32 a10, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s11
-; GCN-NEXT:    v_accvgpr_write_b32 a11, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s12
-; GCN-NEXT:    v_accvgpr_write_b32 a12, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s13
-; GCN-NEXT:    v_accvgpr_write_b32 a13, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s14
-; GCN-NEXT:    v_accvgpr_write_b32 a14, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s15
-; GCN-NEXT:    v_accvgpr_write_b32 a15, v4
+; GCN-NEXT:    v_accvgpr_write_b32 a0, s0
+; GCN-NEXT:    v_accvgpr_write_b32 a1, s1
+; GCN-NEXT:    v_accvgpr_write_b32 a2, s2
+; GCN-NEXT:    v_accvgpr_write_b32 a3, s3
+; GCN-NEXT:    v_accvgpr_write_b32 a4, s4
+; GCN-NEXT:    v_accvgpr_write_b32 a5, s5
+; GCN-NEXT:    v_accvgpr_write_b32 a6, s6
+; GCN-NEXT:    v_accvgpr_write_b32 a7, s7
+; GCN-NEXT:    v_accvgpr_write_b32 a8, s8
+; GCN-NEXT:    v_accvgpr_write_b32 a9, s9
+; GCN-NEXT:    v_accvgpr_write_b32 a10, s10
+; GCN-NEXT:    v_accvgpr_write_b32 a11, s11
+; GCN-NEXT:    v_accvgpr_write_b32 a12, s12
+; GCN-NEXT:    v_accvgpr_write_b32 a13, s13
+; GCN-NEXT:    v_accvgpr_write_b32 a14, s14
+; GCN-NEXT:    v_accvgpr_write_b32 a15, s15
 ; GCN-NEXT:    s_nop 1
 ; GCN-NEXT:    v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
@@ -280,14 +212,10 @@ define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(<4 x float> addrspace(1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v4, s0
-; GCN-NEXT:    v_accvgpr_write_b32 a0, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s1
-; GCN-NEXT:    v_accvgpr_write_b32 a1, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s2
-; GCN-NEXT:    v_accvgpr_write_b32 a2, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s3
-; GCN-NEXT:    v_accvgpr_write_b32 a3, v4
+; GCN-NEXT:    v_accvgpr_write_b32 a0, s0
+; GCN-NEXT:    v_accvgpr_write_b32 a1, s1
+; GCN-NEXT:    v_accvgpr_write_b32 a2, s2
+; GCN-NEXT:    v_accvgpr_write_b32 a3, s3
 ; GCN-NEXT:    s_nop 1
 ; GCN-NEXT:    v_mfma_f32_16x16x16bf16_1k a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
@@ -337,22 +265,14 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(<4 x double> addrspace(1)* %
 ; GCN-NEXT:    s_load_dwordx8 s[0:7], s[8:9], 0x0
 ; GCN-NEXT:    v_pk_mov_b32 v[2:3], s[12:13], s[12:13] op_sel:[0,1]
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v4, s0
-; GCN-NEXT:    v_accvgpr_write_b32 a0, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s1
-; GCN-NEXT:    v_accvgpr_write_b32 a1, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s2
-; GCN-NEXT:    v_accvgpr_write_b32 a2, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s3
-; GCN-NEXT:    v_accvgpr_write_b32 a3, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s4
-; GCN-NEXT:    v_accvgpr_write_b32 a4, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s5
-; GCN-NEXT:    v_accvgpr_write_b32 a5, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s6
-; GCN-NEXT:    v_accvgpr_write_b32 a6, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s7
-; GCN-NEXT:    v_accvgpr_write_b32 a7, v4
+; GCN-NEXT:    v_accvgpr_write_b32 a0, s0
+; GCN-NEXT:    v_accvgpr_write_b32 a1, s1
+; GCN-NEXT:    v_accvgpr_write_b32 a2, s2
+; GCN-NEXT:    v_accvgpr_write_b32 a3, s3
+; GCN-NEXT:    v_accvgpr_write_b32 a4, s4
+; GCN-NEXT:    v_accvgpr_write_b32 a5, s5
+; GCN-NEXT:    v_accvgpr_write_b32 a6, s6
+; GCN-NEXT:    v_accvgpr_write_b32 a7, s7
 ; GCN-NEXT:    s_nop 1
 ; GCN-NEXT:    v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] cbsz:1 abid:2 blgp:3
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
@@ -399,28 +319,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(<4 x double> addrspace(1
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x24
 ; GCN-NEXT:    s_mov_b64 s[4:5], 0
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GCN-NEXT:    s_mov_b64 s[10:11], 1.0
 ; GCN-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GCN-NEXT:    s_mov_b64 s[8:9], s[4:5]
-; GCN-NEXT:    v_mov_b32_e32 v4, s4
-; GCN-NEXT:    v_accvgpr_write_b32 a0, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s5
-; GCN-NEXT:    v_accvgpr_write_b32 a1, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s6
-; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
-; GCN-NEXT:    v_accvgpr_write_b32 a2, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    v_pk_mov_b32 v[0:1], s[14:15], s[14:15] op_sel:[0,1]
-; GCN-NEXT:    v_accvgpr_write_b32 a3, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s8
-; GCN-NEXT:    v_accvgpr_write_b32 a4, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s9
-; GCN-NEXT:    v_accvgpr_write_b32 a5, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s10
-; GCN-NEXT:    v_accvgpr_write_b32 a6, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s11
-; GCN-NEXT:    v_accvgpr_write_b32 a7, v4
+; GCN-NEXT:    s_mov_b64 s[8:9], s[4:5]
+; GCN-NEXT:    v_accvgpr_write_b32 a0, s4
+; GCN-NEXT:    v_accvgpr_write_b32 a1, s5
+; GCN-NEXT:    v_accvgpr_write_b32 a2, s6
+; GCN-NEXT:    v_accvgpr_write_b32 a3, s7
+; GCN-NEXT:    v_accvgpr_write_b32 a4, s8
+; GCN-NEXT:    v_accvgpr_write_b32 a5, s9
+; GCN-NEXT:    v_accvgpr_write_b32 a6, s10
+; GCN-NEXT:    v_accvgpr_write_b32 a7, s11
 ; GCN-NEXT:    v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1]
 ; GCN-NEXT:    s_nop 1
 ; GCN-NEXT:    v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7]
@@ -442,29 +354,21 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(<4 x double> addrs
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x24
 ; GCN-NEXT:    s_mov_b32 s4, 0
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GCN-NEXT:    s_mov_b32 s5, 0x405ec000
 ; GCN-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GCN-NEXT:    s_mov_b64 s[8:9], s[4:5]
-; GCN-NEXT:    s_mov_b64 s[10:11], s[4:5]
-; GCN-NEXT:    v_mov_b32_e32 v4, s4
-; GCN-NEXT:    v_accvgpr_write_b32 a0, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s5
-; GCN-NEXT:    v_accvgpr_write_b32 a1, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s6
-; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
-; GCN-NEXT:    v_accvgpr_write_b32 a2, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s7
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    v_pk_mov_b32 v[0:1], s[14:15], s[14:15] op_sel:[0,1]
-; GCN-NEXT:    v_accvgpr_write_b32 a3, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s8
-; GCN-NEXT:    v_accvgpr_write_b32 a4, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s9
-; GCN-NEXT:    v_accvgpr_write_b32 a5, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s10
-; GCN-NEXT:    v_accvgpr_write_b32 a6, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, s11
-; GCN-NEXT:    v_accvgpr_write_b32 a7, v4
+; GCN-NEXT:    s_mov_b64 s[8:9], s[4:5]
+; GCN-NEXT:    s_mov_b64 s[10:11], s[4:5]
+; GCN-NEXT:    v_accvgpr_write_b32 a0, s4
+; GCN-NEXT:    v_accvgpr_write_b32 a1, s5
+; GCN-NEXT:    v_accvgpr_write_b32 a2, s6
+; GCN-NEXT:    v_accvgpr_write_b32 a3, s7
+; GCN-NEXT:    v_accvgpr_write_b32 a4, s8
+; GCN-NEXT:    v_accvgpr_write_b32 a5, s9
+; GCN-NEXT:    v_accvgpr_write_b32 a6, s10
+; GCN-NEXT:    v_accvgpr_write_b32 a7, s11
 ; GCN-NEXT:    v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1]
 ; GCN-NEXT:    s_nop 1
 ; GCN-NEXT:    v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7]

diff  --git a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
index 63bd95332059c..7aaa0f2e2a341 100644
--- a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
@@ -531,8 +531,7 @@ body:             |
     ; GFX90A-LABEL: name: s_to_a
     ; GFX90A: liveins: $sgpr0
     ; GFX90A-NEXT: {{  $}}
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec
-    ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
+    ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $sgpr0, implicit $exec, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0
     ; GFX940-LABEL: name: s_to_a
     ; GFX940: liveins: $sgpr0
@@ -560,10 +559,8 @@ body:             |
     ; GFX90A-LABEL: name: s2_to_a2
     ; GFX90A: liveins: $sgpr0_sgpr1
     ; GFX90A-NEXT: {{  $}}
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1
-    ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1
-    ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
+    ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $sgpr0_sgpr1
+    ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1
     ; GFX940-LABEL: name: s2_to_a2
     ; GFX940: liveins: $sgpr0_sgpr1
@@ -594,12 +591,9 @@ body:             |
     ; GFX90A-LABEL: name: s3_to_a3
     ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2
     ; GFX90A-NEXT: {{  $}}
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
-    ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
-    ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2
-    ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
+    ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $sgpr0_sgpr1_sgpr2
+    ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
+    ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
     ; GFX940-LABEL: name: s3_to_a3
     ; GFX940: liveins: $sgpr0_sgpr1_sgpr2
@@ -633,14 +627,10 @@ body:             |
     ; GFX90A-LABEL: name: s4_to_a4
     ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX90A-NEXT: {{  $}}
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
+    ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
     ; GFX940-LABEL: name: s4_to_a4
     ; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
@@ -679,18 +669,12 @@ body:             |
     ; GFX90A-LABEL: name: s6_to_a6
     ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
     ; GFX90A-NEXT: {{  $}}
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
-    ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
-    ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
-    ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
-    ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
-    ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
-    ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
+    ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
+    ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
+    ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
+    ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
+    ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
+    ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
     ; GFX940-LABEL: name: s6_to_a6
     ; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
@@ -735,22 +719,14 @@ body:             |
     ; GFX90A-LABEL: name: s8_to_a8
     ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
     ; GFX90A-NEXT: {{  $}}
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
+    ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
     ; GFX940-LABEL: name: s8_to_a8
     ; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
@@ -813,38 +789,22 @@ body:             |
     ; GFX90A-LABEL: name: s16_to_a16
     ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
     ; GFX90A-NEXT: {{  $}}
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr7, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr12, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr13, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr14, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
-    ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
+    ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $sgpr8, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $sgpr9, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $sgpr10, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $sgpr11, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $sgpr12, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $sgpr13, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $sgpr14, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $sgpr15, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
     ; GFX940-LABEL: name: s16_to_a16
     ; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
@@ -1261,14 +1221,10 @@ body:             |
     ; GFX90A: liveins: $agpr0, $sgpr2_sgpr3
     ; GFX90A-NEXT: {{  $}}
     ; GFX90A-NEXT: S_NOP 0, implicit-def dead $sgpr0_sgpr1
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
+    ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX940-LABEL: name: copy_sgpr_to_agpr_tuple
     ; GFX940: liveins: $agpr0, $sgpr2_sgpr3
@@ -1307,14 +1263,10 @@ body:             |
     ; GFX90A: liveins: $agpr0, $sgpr2_sgpr3
     ; GFX90A-NEXT: {{  $}}
     ; GFX90A-NEXT: S_NOP 0, implicit-def dead $sgpr0_sgpr1
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
+    ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
     ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7
     ; GFX940-LABEL: name: copy_sgpr_to_agpr_tuple_kill
     ; GFX940: liveins: $agpr0, $sgpr2_sgpr3

diff  --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-sgpr-no-vgprs.mir b/llvm/test/CodeGen/AMDGPU/agpr-copy-sgpr-no-vgprs.mir
index 4986bd0f4b1bc..205599bfb66ee 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-sgpr-no-vgprs.mir
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-sgpr-no-vgprs.mir
@@ -1,8 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX908 %s
-# RUN: not --crash llc -march=amdgcn -mcpu=gfx90a -run-pass=postrapseudos -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
-
-# CHECK: LLVM ERROR: Error while trying to spill VGPR0 from class VGPR_32: Cannot scavenge register without an emergency spill slot!
+# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX90A %s
 
 ---
 name: no_free_vgprs_for_copy_s32_to_a32
@@ -17,6 +15,11 @@ body: |
     ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr8, implicit $exec
     ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec
     ; GFX908-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr1
+    ; GFX90A-LABEL: name: no_free_vgprs_for_copy_s32_to_a32
+    ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr8, implicit $exec
+    ; GFX90A-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr1
     $agpr1 = COPY $sgpr8
     S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr1
 ...
@@ -36,6 +39,12 @@ body: |
     ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9
     ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec
     ; GFX908-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr2_agpr3
+    ; GFX90A-LABEL: name: no_free_vgprs_for_copy_s64_to_a64
+    ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8_sgpr9
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr8, implicit $exec, implicit-def $agpr2_agpr3, implicit $sgpr8_sgpr9
+    ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9
+    ; GFX90A-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr2_agpr3
     $agpr2_agpr3 = COPY $sgpr8_sgpr9
     S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr2_agpr3
 ...

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll
index 9a6ef61255d7f..95adef63bc6cb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN %s
-; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX908 %s
+; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX908 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A %s
 
 declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16>, <2 x i16>, <32 x float>, i32, i32, i32)
@@ -14,38 +14,39 @@ declare i32 @llvm.amdgcn.workitem.id.x()
 ; GCN-DAG:         v_mov_b32_e32 [[ONE:v[0-9]+]], 1
 ; GCN-DAG:         s_load_dwordx16
 ; GCN-DAG:         s_load_dwordx16
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GCN-DAG:         v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GCN:             v_mfma_f32_32x32x2bf16 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-32: v_accvgpr_read_b32
 ; GFX908:          global_store_dwordx4
@@ -65,7 +66,8 @@ bb:
 ; GCN-DAG:         v_mov_b32_e32 [[TWO:v[0-9]+]], 2
 ; GCN-DAG:         v_mov_b32_e32 [[ONE:v[0-9]+]], 1
 ; GCN:             s_load_dwordx16
-; GCN-COUNT-16:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GCN:             v_mfma_f32_16x16x2bf16 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-16: v_accvgpr_read_b32
 ; GFX908:          global_store_dwordx4
@@ -85,7 +87,8 @@ bb:
 ; GCN-DAG:        v_mov_b32_e32 [[TWO:v[0-9]+]], 2
 ; GCN-DAG:        v_mov_b32_e32 [[ONE:v[0-9]+]], 1
 ; GCN:            s_load_dwordx4
-; GCN-COUNT-4:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GCN:            v_mfma_f32_4x4x2bf16 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-4: v_accvgpr_read_b32
 ; GFX908:         global_store_dwordx4
@@ -105,7 +108,8 @@ bb:
 ; GCN-DAG:         v_mov_b32_e32 [[TWO:v[0-9]+]], 2
 ; GCN-DAG:         v_mov_b32_e32 [[ONE:v[0-9]+]], 1
 ; GCN:             s_load_dwordx16
-; GCN-COUNT-16:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A-COUNT-4:  v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GCN:             v_mfma_f32_32x32x4bf16 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-16: v_accvgpr_read_b32
 ; GFX908:          global_store_dwordx4
@@ -125,7 +129,8 @@ bb:
 ; GCN-DAG:        v_mov_b32_e32 [[TWO:v[0-9]+]], 2
 ; GCN-DAG:        v_mov_b32_e32 [[ONE:v[0-9]+]], 1
 ; GCN:            s_load_dwordx4
-; GCN-COUNT-4:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GCN:            v_mfma_f32_16x16x8bf16 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-4: v_accvgpr_read_b32
 ; GFX908:         global_store_dwordx4

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
index f79406a3fbf2f..4c6955a6f554a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
@@ -13,43 +13,9 @@ declare i32 @llvm.amdgcn.workitem.id.x()
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x4bf16_1k:
 ; GCN-DAG:     s_load_dwordx16
 ; GCN-DAG:     s_load_dwordx16
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX90A-DAG:  v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-DAG:  v_mov_b32_e32 v[[TWO:[0-9]+]], 2
-; GFX940-DAG:  v_mov_b32_e32 v[[ONE:[0-9]+]], 1
-; GFX940-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
-; GFX90A-DAG:  v_mov_b32_e32 v[[TWO:[0-9]+]], 2
-; GFX90A-DAG:  v_mov_b32_e32 v[[ONE:[0-9]+]], 1
+; GCN-DAG:     v_mov_b32_e32 v[[TWO:[0-9]+]], 2
+; GCN-DAG:     v_mov_b32_e32 v[[ONE:[0-9]+]], 1
+; GCN-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GFX90A:      v_mfma_f32_32x32x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX940:      v_mfma_f32_32x32x4_2b_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:     v_accvgpr_read_b32
@@ -65,15 +31,14 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}test_mfma_f32_16x16x4bf16_1k:
-; GCN-DAG:         s_load_dwordx16
-; GCN-DAG:         v_mov_b32_e32 v[[TWO:[0-9]+]], 2
-; GCN-DAG:         v_mov_b32_e32 v[[ONE:[0-9]+]], 1
-; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
-; GFX90A:          v_mfma_f32_16x16x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
-; GFX940:          v_mfma_f32_16x16x4_4b_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
-; GCN-NOT:         v_accvgpr_read_b32
-; GCN-COUNT-4:     global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
+; GCN-DAG:      s_load_dwordx16
+; GCN-DAG:      v_mov_b32_e32 v[[TWO:[0-9]+]], 2
+; GCN-DAG:      v_mov_b32_e32 v[[ONE:[0-9]+]], 1
+; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX90A:       v_mfma_f32_16x16x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
+; GFX940:       v_mfma_f32_16x16x4_4b_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
+; GCN-NOT:      v_accvgpr_read_b32
+; GCN-COUNT-4:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
 define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(<16 x float> addrspace(1)* %arg) #0 {
 bb:
   %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg
@@ -85,15 +50,14 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}test_mfma_f32_4x4x4bf16_1k:
-; GCN-DAG:        s_load_dwordx4
-; GCN-DAG:        v_mov_b32_e32 v[[TWO:[0-9]+]], 2
-; GCN-DAG:        v_mov_b32_e32 v[[ONE:[0-9]+]], 1
-; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
-; GFX90A:         v_mfma_f32_4x4x4bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
-; GFX940:         v_mfma_f32_4x4x4_16b_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
-; GCN-NOT:        v_accvgpr_read_b32
-; GCN:            global_store_dwordx4 v{{[0-9]+}}, [[RES]],
+; GCN-DAG:     s_load_dwordx4
+; GCN-DAG:     v_mov_b32_e32 v[[TWO:[0-9]+]], 2
+; GCN-DAG:     v_mov_b32_e32 v[[ONE:[0-9]+]], 1
+; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX90A:      v_mfma_f32_4x4x4bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
+; GFX940:      v_mfma_f32_4x4x4_16b_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
+; GCN-NOT:     v_accvgpr_read_b32
+; GCN:         global_store_dwordx4 v{{[0-9]+}}, [[RES]],
 define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(<4 x float> addrspace(1)* %arg) #0 {
 bb:
   %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg
@@ -105,15 +69,14 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x8bf16_1k:
-; GCN-DAG:         s_load_dwordx16
-; GCN-DAG:         v_mov_b32_e32 v[[TWO:[0-9]+]], 2
-; GCN-DAG:         v_mov_b32_e32 v[[ONE:[0-9]+]], 1
-; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
-; GFX90A:          v_mfma_f32_32x32x8bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
-; GFX940:          v_mfma_f32_32x32x8_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
-; GCN-NOT:         v_accvgpr_read_b32
-; GCN-COUNT-4:     global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
+; GCN-DAG:      s_load_dwordx16
+; GCN-DAG:      v_mov_b32_e32 v[[TWO:[0-9]+]], 2
+; GCN-DAG:      v_mov_b32_e32 v[[ONE:[0-9]+]], 1
+; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX90A:       v_mfma_f32_32x32x8bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
+; GFX940:       v_mfma_f32_32x32x8_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
+; GCN-NOT:      v_accvgpr_read_b32
+; GCN-COUNT-4:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
 define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(<16 x float> addrspace(1)* %arg) #0 {
 bb:
   %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg
@@ -125,15 +88,14 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}test_mfma_f32_16x16x16bf16_1k:
-; GCN-DAG:        s_load_dwordx4
-; GCN-DAG:        v_mov_b32_e32 v[[TWO:[0-9]+]], 2
-; GCN-DAG:        v_mov_b32_e32 v[[ONE:[0-9]+]], 1
-; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
-; GFX90A:         v_mfma_f32_16x16x16bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
-; GFX940:         v_mfma_f32_16x16x16_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
-; GCN-NOT:        v_accvgpr_read_b32
-; GCN:            global_store_dwordx4 v{{[0-9]+}}, [[RES]],
+; GCN-DAG:     s_load_dwordx4
+; GCN-DAG:     v_mov_b32_e32 v[[TWO:[0-9]+]], 2
+; GCN-DAG:     v_mov_b32_e32 v[[ONE:[0-9]+]], 1
+; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX90A:      v_mfma_f32_16x16x16bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
+; GFX940:      v_mfma_f32_16x16x16_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
+; GCN-NOT:     v_accvgpr_read_b32
+; GCN:         global_store_dwordx4 v{{[0-9]+}}, [[RES]],
 define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(<4 x float> addrspace(1)* %arg) #0 {
 bb:
   %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg
@@ -200,13 +162,12 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_lit:
-; GCN-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}}
-; GFX90A-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x405ec000
-; GFX940-DAG: s_mov_b32 s{{[0-9]+}}, 0x405ec000
-; GFX90A:     v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}}
-; GFX940:     v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}}
-; GCN:        global_store_dwordx4
-; GCN:        global_store_dwordx4
+; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}}
+; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 0x405ec000
+; GFX90A:  v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}}
+; GFX940:  v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}}
+; GCN:     global_store_dwordx4
+; GCN:     global_store_dwordx4
 define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 {
 bb:
   %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> <double 123.0, double 123.0, double 123.0, double 123.0>, i32 0, i32 0, i32 0)

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll
index 9e59d18c68fd8..ee041ca226c3c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN %s
-; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX908 %s
+; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX908 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A %s
 
 declare <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32, i32, <16 x i32>, i32, i32, i32)
@@ -9,7 +9,8 @@ declare <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32, i32, <4 x i32>, i32, i32
 ; GCN-DAG:         v_mov_b32_e32 [[TWO:v[0-9]+]], 2
 ; GCN-DAG:         v_mov_b32_e32 [[ONE:v[0-9]+]], 1
 ; GCN:             s_load_dwordx16
-; GCN-COUNT-16:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GCN:             v_mfma_i32_32x32x8i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-16: v_accvgpr_read_b32
 ; GFX908:          global_store_dwordx4
@@ -27,7 +28,8 @@ bb:
 ; GCN-DAG:        v_mov_b32_e32 [[TWO:v[0-9]+]], 2
 ; GCN-DAG:        v_mov_b32_e32 [[ONE:v[0-9]+]], 1
 ; GCN:            s_load_dwordx4
-; GCN-COUNT-4:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GCN:            v_mfma_i32_16x16x16i8 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-4: v_accvgpr_read_b32
 ; GFX908:         global_store_dwordx4

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
index 193737dc06d90..a943e5f414551 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
@@ -23,39 +23,39 @@ declare i32 @llvm.amdgcn.workitem.id.x()
 ; GCN-DAG:        v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
 ; GCN-DAG:        s_load_dwordx16
 ; GCN-DAG:        s_load_dwordx16
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-COUNT-32:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:     v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A_40-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GFX908_A:       v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX940:         v_mfma_f32_32x32x1_2b_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-4: v_accvgpr_read_b32
@@ -80,8 +80,8 @@ bb:
 ; GCN-DAG:           v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
 ; GCN-DAG:           v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
 ; GCN:               s_load_dwordx16
-; GFX908_A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-COUNT-16:   v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX908-COUNT-16:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GFX908_A:          v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX940:            v_mfma_f32_16x16x1_4b_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT:      v_accvgpr_read_b32
@@ -100,8 +100,8 @@ bb:
 ; GCN-DAG:          v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
 ; GCN-DAG:          v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
 ; GCN:              s_load_dwordx4
-; GFX908_A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-COUNT-4:   v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX908-COUNT-4:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GFX908_A:         v_mfma_f32_4x4x1f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX940:           v_mfma_f32_4x4x1_16b_f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-4:   v_accvgpr_read_b32
@@ -120,8 +120,8 @@ bb:
 ; GCN-DAG:           v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
 ; GCN-DAG:           v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
 ; GCN:               s_load_dwordx16
-; GFX908_A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-COUNT-16:   v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX908-COUNT-16:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GFX908_A:          v_mfma_f32_32x32x2f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX940:            v_mfma_f32_32x32x2_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-16:   v_accvgpr_read_b32
@@ -140,8 +140,8 @@ bb:
 ; GCN-DAG:          v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
 ; GCN-DAG:          v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
 ; GCN:              s_load_dwordx4
-; GFX908_A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-COUNT-4:   v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX908-COUNT-4:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GFX908_A:         v_mfma_f32_16x16x4f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX940:           v_mfma_f32_16x16x4_f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-4:   v_accvgpr_read_b32
@@ -159,8 +159,8 @@ bb:
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x4f16:
 ; GCN-DAG:           s_load_dwordx16
 ; GCN-DAG:           s_load_dwordx16
-; GFX908_A-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-COUNT-32:   v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX908-COUNT-32:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A_40-COUNT-32:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GFX908_A:          v_mfma_f32_32x32x4f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX940:            v_mfma_f32_32x32x4_2b_f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-32:   v_accvgpr_read_b32
@@ -180,8 +180,8 @@ bb:
 
 ; GCN-LABEL: {{^}}test_mfma_f32_16x16x4f16:
 ; GCN:               s_load_dwordx16
-; GFX908_A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-COUNT-16:   v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX908-COUNT-16:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GFX908_A:          v_mfma_f32_16x16x4f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX940:            v_mfma_f32_16x16x4_4b_f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-16:   v_accvgpr_read_b32
@@ -202,8 +202,8 @@ bb:
 ; GCN-LABEL: {{^}}test_mfma_f32_4x4x4f16:
 ; GCN:              s_load_dwordx4
 ; GCN:              s_load_dwordx4
-; GFX908_A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-COUNT-4:   v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX908-COUNT-4:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GFX908_A:         v_mfma_f32_4x4x4f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX940:           v_mfma_f32_4x4x4_16b_f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-4:   v_accvgpr_read_b32
@@ -225,8 +225,8 @@ bb:
 ; GCN:               s_load_dwordx16
 ; GCN:               s_waitcnt lgkmcnt(0)
 ; GFX908_A:          v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
-; GFX908_A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-COUNT-16:   v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX908-COUNT-16:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GFX908_A:          v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX940:            v_mfma_f32_32x32x8_f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-16:   v_accvgpr_read_b32
@@ -247,8 +247,8 @@ bb:
 ; GCN-LABEL: {{^}}test_mfma_f32_16x16x16f16:
 ; GCN:              s_load_dwordx4
 ; GCN:              s_load_dwordx4
-; GFX908_A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-COUNT-4:   v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX908-COUNT-4:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GFX908_A:         v_mfma_f32_16x16x16f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX940:           v_mfma_f32_16x16x16_f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-4:   v_accvgpr_read_b32
@@ -271,39 +271,39 @@ bb:
 ; GCN-DAG:         v_mov_b32_e32 [[ONE:v[0-9]+]], 1
 ; GCN-DAG:         s_load_dwordx16
 ; GCN-DAG:         s_load_dwordx16
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908_A-DAG:    v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX908-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A_40-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GFX908_A:        v_mfma_i32_32x32x4i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX940:          v_mfma_i32_32x32x4_2b_i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-32: v_accvgpr_read_b32
@@ -322,8 +322,8 @@ bb:
 ; GCN-DAG:           v_mov_b32_e32 [[TWO:v[0-9]+]], 2
 ; GCN-DAG:           v_mov_b32_e32 [[ONE:v[0-9]+]], 1
 ; GCN:               s_load_dwordx16
-; GFX908_A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-COUNT-16:   v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX908-COUNT-16:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GFX908_A:          v_mfma_i32_16x16x4i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX940:            v_mfma_i32_16x16x4_4b_i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-16:   v_accvgpr_read_b32
@@ -342,8 +342,8 @@ bb:
 ; GCN-DAG:          v_mov_b32_e32 [[TWO:v[0-9]+]], 2
 ; GCN-DAG:          v_mov_b32_e32 [[ONE:v[0-9]+]], 1
 ; GCN:              s_load_dwordx4
-; GFX908_A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX940-COUNT-4:   v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX908-COUNT-4:   v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
+; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 ; GFX908_A:         v_mfma_i32_4x4x4i8 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX940:           v_mfma_i32_4x4x4_16b_i8 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-4:   v_accvgpr_read_b32
@@ -594,8 +594,8 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}test_mfma_f32_4x4x1f32_lit_splat:
-; GFX908_A:       v_mov_b32_e32 [[TMP:v[0-9]+]], 0x42f60000
-; GFX940:         s_mov_b32 [[TMP:s[0-9]+]], 0x42f60000
+; GFX908:         v_mov_b32_e32 [[TMP:v[0-9]+]], 0x42f60000
+; GFX90A_40:      s_mov_b32 [[TMP:s[0-9]+]], 0x42f60000
 ; GCN:            v_accvgpr_write_b32 [[TTMPA:a[0-9]+]], [[TMP]]
 ; GFX908:         v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
 ; GFX908:         v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
@@ -620,8 +620,8 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}test_mfma_f32_4x4x1f32_lit_splat_bad_code:
-; GFX908_A: v_mov_b32_e32 [[TMP0:v[0-9]+]], 0x42f60000
-; GFX940:   s_mov_b32 [[TMP0:s[0-9]+]], 0x42f60000
+; GFX908:   v_mov_b32_e32 [[TMP0:v[0-9]+]], 0x42f60000
+; GFX90A_40:s_mov_b32 [[TMP0:s[0-9]+]], 0x42f60000
 ; GCN:      v_accvgpr_write_b32 [[AGPR:a[0-9]+]], [[TMP0]]
 ; GFX90A_40-COUNT-3: v_accvgpr_mov_b32 a{{[0-9]+}}, [[AGPR]]
 ; GFX908-NEXT:   v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP0]]

diff  --git a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
index ba77723ccdddf..61ce2689f333d 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX908,GFX908_A %s
-; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A,GFX908_A %s
-; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX940 %s
+; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A,GFX908_A,GFX940_A %s
+; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX940,GFX940_A %s
 
 ; GCN-LABEL: {{^}}test_mfma_loop_zeroinit:
 
@@ -47,8 +47,8 @@ exit:
 ; 3 vgprs are needed to avoid wait states between writes.
 ; Check that we do not use 32 temp sgprs as well.
 
-; GFX908_A:        v_mov_b32_e32 [[TMP:v[0-9]+]], 0x42f60000
-; GFX940:          s_mov_b32 [[TMP:s[0-9]+]], 0x42f60000
+; GFX908:          v_mov_b32_e32 [[TMP:v[0-9]+]], 0x42f60000
+; GFX940_A:        s_mov_b32 [[TMP:s[0-9]+]], 0x42f60000
 ; GFX908-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
 ; GFX90A:          v_accvgpr_write_b32 [[LEAD:a[0-9]+]], [[TMP]]
 ; GFX90A-COUNT-31: v_accvgpr_mov_b32 a{{[0-9]+}}, [[LEAD]]
@@ -188,73 +188,8 @@ exit:
 ; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
 ; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
 
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
-; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-
-; GFX940-COUNT-32: s_mov_b32 s{{[0-9]+}}, 0x4{{[0-9a-f]+}}
-; GFX940-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
+; GFX940_A-COUNT-32: s_mov_b32 s{{[0-9]+}}, 0x4{{[0-9a-f]+}}
+; GFX940_A-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
 
 ; GCN: [[LOOP:.LBB[0-9_]+]]:
 ; GCN-NOT:  v_accvgpr
@@ -355,10 +290,9 @@ exit:
 
 ; GCN-LABEL: {{^}}test_mfma_loop_sgpr_init:
 
-; GFX908_A:        v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}}
+; GFX908:          v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}}
 ; GFX908-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
-; GFX90A:          v_accvgpr_write_b32 [[LEAD:a[0-9]+]], [[TMP]]
-; GFX940:          v_accvgpr_write_b32 [[LEAD:a[0-9]+]], s{{[0-9]+}}
+; GFX940_A:        v_accvgpr_write_b32 [[LEAD:a[0-9]+]], s{{[0-9]+}}
 ; GFX90A-COUNT-31: v_accvgpr_mov_b32 a{{[0-9]+}}, [[LEAD]]
 
 ; GCN: [[LOOP:.LBB[0-9_]+]]:
@@ -426,8 +360,8 @@ exit:
 ; GCN-LABEL: {{^}}test_mfma_loop_mixed_init:
 
 ; GCN-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, v0
-; GFX908_A-DAG: v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}}
-; GFX940-DAG:   s_load_dword [[TMP:s[0-9]+]],
+; GFX908-DAG:   v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}}
+; GFX940_A-DAG: s_load_dword [[TMP:s[0-9]+]],
 ; GCN-DAG:      v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
 ; GFX908-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}}
 ; GFX908-DAG:   v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}}

diff  --git a/llvm/test/MC/AMDGPU/mai-gfx90a.s b/llvm/test/MC/AMDGPU/mai-gfx90a.s
index db116f8a20cd2..5314c76074172 100644
--- a/llvm/test/MC/AMDGPU/mai-gfx90a.s
+++ b/llvm/test/MC/AMDGPU/mai-gfx90a.s
@@ -27,6 +27,9 @@ v_accvgpr_write a2, v255
 v_accvgpr_mov_b32 a1, a2
 // GFX90A: v_accvgpr_mov_b32 a1, a2        ; encoding: [0x02,0xa5,0x02,0x7e]
 
+v_accvgpr_write_b32 a10, s20
+// GFX940: v_accvgpr_write_b32 a10, s20    ; encoding: [0x0a,0x40,0xd9,0xd3,0x14,0x00,0x00,0x18]
+
 v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[34:65]
 // GFX90A: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[34:65] ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x8a,0x04]
 

diff  --git a/llvm/test/MC/AMDGPU/mai-gfx940.s b/llvm/test/MC/AMDGPU/mai-gfx940.s
index b76de2dad6a27..6570b6229bc24 100644
--- a/llvm/test/MC/AMDGPU/mai-gfx940.s
+++ b/llvm/test/MC/AMDGPU/mai-gfx940.s
@@ -3,7 +3,6 @@
 
 v_accvgpr_write_b32 a10, s20
 // GFX940: v_accvgpr_write_b32 a10, s20    ; encoding: [0x0a,0x40,0xd9,0xd3,0x14,0x00,0x00,0x18]
-// GFX90A: error: source operand must be either a VGPR or an inline constant
 
 v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3]
 // GFX940: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x14]

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/mai-gfx90a.txt b/llvm/test/MC/Disassembler/AMDGPU/mai-gfx90a.txt
index ed6a60b634ee7..6d4a4760d8eb1 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/mai-gfx90a.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/mai-gfx90a.txt
@@ -21,6 +21,9 @@
 # GFX90A: v_accvgpr_mov_b32 a1, a2 ; encoding: [0x02,0xa5,0x02,0x7e]
 0x02,0xa5,0x02,0x7e
 
+# GFX940: v_accvgpr_write_b32 a10, s20 ; encoding: [0x0a,0x40,0xd9,0xd3,0x14,0x00,0x00,0x18]
+0x0a,0x40,0xd9,0xd3,0x14,0x00,0x00,0x18
+
 # GFX90A: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[2:33] ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x0a,0x04]
 0x00,0x80,0xc0,0xd3,0x00,0x03,0x0a,0x04
 


        


More information about the llvm-commits mailing list