[llvm] [AMDGPU] Inplace FI elimination during PEI for scalar copy instruction (PR #99556)

Pankaj Dwivedi via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 24 12:55:37 PDT 2024


https://github.com/PankajDwivedi-25 updated https://github.com/llvm/llvm-project/pull/99556

>From 4c43e74e9d3a7cd217b542c8a0eadc1ef83eede3 Mon Sep 17 00:00:00 2001
From: PankajDwivedi-25 <pankajkumar.divedi at amd.com>
Date: Fri, 19 Jul 2024 01:07:54 +0530
Subject: [PATCH 1/8] [AMDGPU] Inplace FI elimination during PEI for scalar
 copy instruction

---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |  70 +++-
 llvm/test/CodeGen/AMDGPU/frame-index.mir      | 324 ++++++++++++++++++
 .../CodeGen/AMDGPU/reduce-frame-index.mir     | 176 ++++++++++
 3 files changed, 552 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 8a315aa822786..05153383a3a21 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2449,7 +2449,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                                             ? &AMDGPU::SReg_32RegClass
                                             : &AMDGPU::VGPR_32RegClass;
         bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
-                      MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
+                      MI->getOpcode() == AMDGPU::V_MOV_B32_e64 ||
+                      MI->getOpcode() == AMDGPU::S_MOV_B32;
         Register ResultReg =
             IsCopy ? MI->getOperand(0).getReg()
                    : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
@@ -2458,7 +2459,13 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
         if (Offset == 0) {
           unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
                                                : AMDGPU::V_LSHRREV_B32_e64;
-          auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), ResultReg);
+          Register TmpResultReg = ResultReg;
+          if (IsSALU && LiveSCC) {
+            TmpResultReg = RS->scavengeRegisterBackwards(
+                AMDGPU::VGPR_32RegClass, MI, false, 0);
+          }
+
+          auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), TmpResultReg);
           if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
             // For V_LSHRREV, the operands are reversed (the shift count goes
             // first).
@@ -2468,11 +2475,13 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
           if (IsSALU && !LiveSCC)
             Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
           if (IsSALU && LiveSCC) {
-            Register NewDest = RS->scavengeRegisterBackwards(
-                AMDGPU::SReg_32RegClass, Shift, false, 0);
+            Register NewDest =
+                IsCopy ? ResultReg
+                       : RS->scavengeRegisterBackwards(AMDGPU::SReg_32RegClass,
+                                                       Shift, false, 0);
             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
                     NewDest)
-                .addReg(ResultReg);
+                .addReg(TmpResultReg);
             ResultReg = NewDest;
           }
         } else {
@@ -2523,22 +2532,47 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
 
             // We may have 1 free scratch SGPR even though a carry out is
             // unavailable. Only one additional mov is needed.
-            Register TmpScaledReg = RS->scavengeRegisterBackwards(
-                AMDGPU::SReg_32_XM0RegClass, MI, false, 0, false);
-            Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
-
-            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
-              .addReg(FrameReg)
-              .addImm(ST.getWavefrontSizeLog2());
-            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
-                .addReg(ScaledReg, RegState::Kill)
-                .addImm(Offset);
+            Register TmpScaledReg =
+                IsCopy && IsSALU
+                    ? ResultReg
+                    : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
+                                                    MI, false, 0, false);
+            Register ScaledReg =
+                TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
+            Register TmpResultReg = ScaledReg;
+
+            if (!LiveSCC) {
+              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), TmpResultReg)
+                  .addReg(FrameReg)
+                  .addImm(ST.getWavefrontSizeLog2());
+              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpResultReg)
+                  .addReg(TmpResultReg, RegState::Kill)
+                  .addImm(Offset);
+            } else {
+              TmpResultReg = RS->scavengeRegisterBackwards(
+                  AMDGPU::VGPR_32RegClass, MI, false, 0);
+
+              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHR_B32_e64),
+                      TmpResultReg)
+                  .addImm(ST.getWavefrontSizeLog2())
+                  .addReg(FrameReg);
+              auto Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32),
+                                 TmpResultReg);
+              Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
+              Register NewDest =
+                  IsCopy ? ResultReg
+                         : RS->scavengeRegisterBackwards(
+                               AMDGPU::SReg_32RegClass, Add, false, 0);
+              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+                      NewDest)
+                  .addReg(TmpResultReg);
+              ResultReg = NewDest;
+            }
             if (!IsSALU)
               BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
-                  .addReg(ScaledReg, RegState::Kill);
+                  .addReg(TmpResultReg, RegState::Kill);
             else
-              ResultReg = ScaledReg;
-
+              ResultReg = TmpResultReg;
             // If there were truly no free SGPRs, we need to undo everything.
             if (!TmpScaledReg.isValid()) {
               BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index d8736c5276a26..f4476a497f754 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -211,3 +211,327 @@ body:             |
     renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
 ...
 
+---
+name: materialize_fi_s_mov_b32_offset_0_dead_scc
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_0_dead_scc
+    ; GCN: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4
+  renamable $sgpr4 = S_MOV_B32 %stack.0
+  S_ENDPGM 0, implicit $sgpr4
+
+...
+
+---
+name: materialize_fi_s_mov_b32_offset_0_live_scc
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    liveins: $sgpr4, $sgpr5
+
+    ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc
+    ; GCN: liveins: $sgpr4, $sgpr5
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GCN-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+    ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+  S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+  renamable $sgpr4 = S_MOV_B32 %stack.0
+  S_ENDPGM 0, implicit $sgpr4, implicit $scc
+
+...
+
+# FI#0 is filler to get a non-0 offset for FI#1
+---
+name: materialize_fi_s_mov_b32_offset_64_dead_scc
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: default, size: 64, alignment: 16, stack-id: default }
+  - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_64_dead_scc
+    ; GCN: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+    ; GCN-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 64, implicit-def $scc
+    ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4
+  renamable $sgpr4 = S_MOV_B32 %stack.1
+  S_ENDPGM 0, implicit $sgpr4
+
+...
+
+---
+name: materialize_fi_s_mov_b32_offset_68_dead_scc
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: default, size: 68, alignment: 16, stack-id: default }
+  - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_68_dead_scc
+    ; GCN: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+    ; GCN-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc
+    ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4
+  renamable $sgpr4 = S_MOV_B32 %stack.1
+  S_ENDPGM 0, implicit $sgpr4
+
+...
+
+---
+name: materialize_fi_s_mov_b32_offset_64_live_scc
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: default, size: 64, alignment: 16, stack-id: default }
+  - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    liveins: $sgpr4, $sgpr5
+
+    ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc
+    ; GCN: liveins: $sgpr4, $sgpr5
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+    ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+  S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+  renamable $sgpr4 = S_MOV_B32 %stack.1
+  S_ENDPGM 0, implicit $sgpr4, implicit $scc
+
+...
+
+---
+name: materialize_fi_s_mov_b32_offset_68_live_scc
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: default, size: 68, alignment: 16, stack-id: default }
+  - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    liveins: $sgpr4, $sgpr5
+
+    ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc
+    ; GCN: liveins: $sgpr4, $sgpr5
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 68, killed $vgpr0, implicit $exec
+    ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+  S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+  renamable $sgpr4 = S_MOV_B32 %stack.1
+  S_ENDPGM 0, implicit $sgpr4, implicit $scc
+
+...
+
+# FIXME: This is finding a VGPR
+---
+name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: default, size: 4, alignment: 16, stack-id: default }
+machineFunctionInfo:
+  scratchRSrcReg:  $sgpr0_sgpr1_sgpr2_sgpr3
+  stackPtrOffsetReg: '$sgpr32'
+  occupancy: 10
+
+body:             |
+  bb.0:
+    liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+
+    ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
+    ; GCN: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GCN-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+    ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GCN-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GCN-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GCN-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GCN-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GCN-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GCN-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GCN-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+    ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+    ; GCN-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+    ; GCN-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+    ; GCN-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+    ; GCN-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+    ; GCN-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+    ; GCN-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+    ; GCN-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+    ; GCN-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+    ; GCN-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+    ; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+    ; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+    ; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+    S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+
+    renamable $sgpr4 = S_MOV_B32 %stack.0
+
+    S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    S_ENDPGM 0, implicit $sgpr4, implicit $scc
+
+...
+
+# FIXME: This is clobbering scc
+---
+name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: default, size: 64, alignment: 16, stack-id: default }
+  - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+  scratchRSrcReg:  $sgpr0_sgpr1_sgpr2_sgpr3
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    liveins: $sgpr4, $sgpr5
+
+    ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
+    ; GCN: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec
+    ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GCN-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GCN-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GCN-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GCN-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GCN-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GCN-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GCN-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+    ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+    ; GCN-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+    ; GCN-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+    ; GCN-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+    ; GCN-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+    ; GCN-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+    ; GCN-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+    ; GCN-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+    ; GCN-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+    ; GCN-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+    ; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+    ; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+    ; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+    ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+    S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+
+    renamable $sgpr4 = S_MOV_B32 %stack.1
+
+    S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    S_ENDPGM 0, implicit $sgpr4, implicit $scc
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
new file mode 100644
index 0000000000000..74e8b3b47fdb9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
@@ -0,0 +1,176 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s
+--- |
+  target triple = "amdgcn-amd-amdhsa"
+
+  declare double @killer()
+  define void @bug() #0 {
+    ret void
+  }
+
+  attributes #0 = { "amdgpu-stack-objects" "target-cpu"="gfx90a" }
+
+...
+---
+name:            bug
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    8
+  adjustsStack:    true
+  hasCalls:        true
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: 0, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+  - { id: 1, name: '', type: default, offset: 4, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: 4, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+  - { id: 2, name: '', type: default, offset: 8, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: 8, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+  - { id: 3, name: '', type: default, offset: 12, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: 12, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+  - { id: 4, name: '', type: default, offset: 16, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: 16, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+  - { id: 5, name: '', type: default, offset: 20, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: 20, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+  - { id: 6, name: '', type: default, offset: 24, size: 8, alignment: 8,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: 24, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+  - { id: 7, name: '', type: spill-slot, offset: 32, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 8, name: '', type: spill-slot, offset: 36, size: 8, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 9, name: '', type: spill-slot, offset: 44, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 10, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
+      stack-id: sgpr-spill, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 11, name: '', type: spill-slot, offset: 48, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 12, name: '', type: default, offset: 52, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+  isEntryFunction: false
+  isChainFunction: false
+  memoryBound:     false
+  waveLimiter:     false
+  hasSpilledSGPRs: true
+  hasSpilledVGPRs: true
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+  bytesInStackArgArea: 0
+  returnsVoid:     true
+  argumentInfo:
+    privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+    dispatchPtr:     { reg: '$sgpr4_sgpr5' }
+    queuePtr:        { reg: '$sgpr6_sgpr7' }
+    dispatchID:      { reg: '$sgpr10_sgpr11' }
+    workGroupIDX:    { reg: '$sgpr12' }
+    workGroupIDY:    { reg: '$sgpr13' }
+    workGroupIDZ:    { reg: '$sgpr14' }
+    LDSKernelId:     { reg: '$sgpr15' }
+    implicitArgPtr:  { reg: '$sgpr8_sgpr9' }
+    workItemIDX:     { reg: '$vgpr31', mask: 1023 }
+    workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
+    workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
+  psInputAddr:     0
+  psInputEnable:   0
+  mode:
+    ieee:            true
+    dx10-clamp:      true
+    fp32-input-denormals: true
+    fp32-output-denormals: true
+    fp64-fp16-input-denormals: true
+    fp64-fp16-output-denormals: true
+  highBitsOf32BitAddress: 0
+  occupancy:       8
+  wwmReservedRegs:
+    - '$vgpr63'
+    - '$vgpr40'
+  scavengeFI:      '%stack.12'
+  vgprForAGPRCopy: ''
+  sgprForEXECCopy: ''
+  longBranchReservedReg: ''
+body:             |
+  bb.0:
+    liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+
+    ; CHECK-LABEL: name: bug
+    ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr41, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $sgpr16 = COPY $sgpr33
+    ; CHECK-NEXT: $sgpr33 = frame-setup COPY $sgpr32
+    ; CHECK-NEXT: $sgpr18_sgpr19 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 56, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 60, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+    ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr18_sgpr19
+    ; CHECK-NEXT: $vgpr41 = SI_SPILL_S32_TO_VGPR $sgpr16, 0, undef $vgpr41
+    ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 5120, implicit-def dead $scc
+    ; CHECK-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
+    ; CHECK-NEXT: renamable $sgpr17 = S_MOV_B32 0
+    ; CHECK-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
+    ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr33, 6, implicit-def dead $scc
+    ; CHECK-NEXT: renamable $sgpr29 = COPY undef renamable $sgpr30
+    ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
+    ; CHECK-NEXT: $sgpr20 = S_ADD_I32 killed $sgpr20, 8, implicit-def $scc
+    ; CHECK-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
+    ; CHECK-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
+    ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
+    ; CHECK-NEXT: $sgpr31 = S_ADD_I32 killed $sgpr31, 24, implicit-def $scc
+    ; CHECK-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr26_sgpr27, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr24_sgpr25, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr22_sgpr23, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
+    ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @killer, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+    ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr41, 0
+    ; CHECK-NEXT: $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    ; CHECK-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 56, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+    ; CHECK-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 60, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+    ; CHECK-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+    ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
+    ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -5120, implicit-def dead $scc
+    ; CHECK-NEXT: $sgpr33 = COPY $sgpr4
+    ; CHECK-NEXT: SI_RETURN
+    renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
+    renamable $sgpr17 = S_MOV_B32 0
+    undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
+    renamable $sgpr24 = S_MOV_B32 %stack.0
+    renamable $sgpr29 = COPY undef renamable $sgpr30
+    renamable $sgpr20 = S_MOV_B32 %stack.2
+    undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
+    undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
+    renamable $sgpr31 = S_MOV_B32 %stack.6
+    renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
+    renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
+    renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
+    renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr26_sgpr27, implicit $exec
+    renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr24_sgpr25, implicit $exec
+    renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr22_sgpr23, implicit $exec
+    renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
+    renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
+    dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @killer, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+    SI_RETURN
+
+...

>From 7b80c8bd248342f6908ae063a263ed50af7b1020 Mon Sep 17 00:00:00 2001
From: PankajDwivedi-25 <pankajkumar.divedi at amd.com>
Date: Fri, 19 Jul 2024 14:02:37 +0530
Subject: [PATCH 2/8] refactored SIRegisterInfo, reduce-frame-index

---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |   1 -
 .../CodeGen/AMDGPU/reduce-frame-index.mir     | 116 ++----------------
 2 files changed, 11 insertions(+), 106 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 05153383a3a21..b08558ae073a2 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2551,7 +2551,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
             } else {
               TmpResultReg = RS->scavengeRegisterBackwards(
                   AMDGPU::VGPR_32RegClass, MI, false, 0);
-
               BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHR_B32_e64),
                       TmpResultReg)
                   .addImm(ST.getWavefrontSizeLog2())
diff --git a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
index 74e8b3b47fdb9..0661bf2778bb7 100644
--- a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
@@ -14,127 +14,41 @@
 ---
 name:            bug
 tracksRegLiveness: true
-frameInfo:
-  maxAlignment:    8
-  adjustsStack:    true
-  hasCalls:        true
 stack:
   - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
       stack-id: default, callee-saved-register: '', callee-saved-restored: true,
       local-offset: 0, debug-info-variable: '', debug-info-expression: '',
       debug-info-location: '' }
-  - { id: 1, name: '', type: default, offset: 4, size: 4, alignment: 4,
-      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-      local-offset: 4, debug-info-variable: '', debug-info-expression: '',
-      debug-info-location: '' }
-  - { id: 2, name: '', type: default, offset: 8, size: 4, alignment: 4,
+  - { id: 1, name: '', type: default, offset: 8, size: 4, alignment: 4,
       stack-id: default, callee-saved-register: '', callee-saved-restored: true,
       local-offset: 8, debug-info-variable: '', debug-info-expression: '',
       debug-info-location: '' }
-  - { id: 3, name: '', type: default, offset: 12, size: 4, alignment: 4,
-      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-      local-offset: 12, debug-info-variable: '', debug-info-expression: '',
-      debug-info-location: '' }
-  - { id: 4, name: '', type: default, offset: 16, size: 4, alignment: 4,
-      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-      local-offset: 16, debug-info-variable: '', debug-info-expression: '',
-      debug-info-location: '' }
-  - { id: 5, name: '', type: default, offset: 20, size: 4, alignment: 4,
-      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-      local-offset: 20, debug-info-variable: '', debug-info-expression: '',
-      debug-info-location: '' }
-  - { id: 6, name: '', type: default, offset: 24, size: 8, alignment: 8,
+  - { id: 2, name: '', type: default, offset: 24, size: 8, alignment: 8,
       stack-id: default, callee-saved-register: '', callee-saved-restored: true,
       local-offset: 24, debug-info-variable: '', debug-info-expression: '',
       debug-info-location: '' }
-  - { id: 7, name: '', type: spill-slot, offset: 32, size: 4, alignment: 4,
-      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 8, name: '', type: spill-slot, offset: 36, size: 8, alignment: 4,
-      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 9, name: '', type: spill-slot, offset: 44, size: 4, alignment: 4,
-      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 10, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
-      stack-id: sgpr-spill, callee-saved-register: '', callee-saved-restored: true,
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 11, name: '', type: spill-slot, offset: 48, size: 4, alignment: 4,
-      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 12, name: '', type: default, offset: 52, size: 4, alignment: 4,
-      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+
 machineFunctionInfo:
-  isEntryFunction: false
-  isChainFunction: false
-  memoryBound:     false
-  waveLimiter:     false
-  hasSpilledSGPRs: true
-  hasSpilledVGPRs: true
   scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
   frameOffsetReg:  '$sgpr33'
   stackPtrOffsetReg: '$sgpr32'
-  bytesInStackArgArea: 0
-  returnsVoid:     true
-  argumentInfo:
-    privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
-    dispatchPtr:     { reg: '$sgpr4_sgpr5' }
-    queuePtr:        { reg: '$sgpr6_sgpr7' }
-    dispatchID:      { reg: '$sgpr10_sgpr11' }
-    workGroupIDX:    { reg: '$sgpr12' }
-    workGroupIDY:    { reg: '$sgpr13' }
-    workGroupIDZ:    { reg: '$sgpr14' }
-    LDSKernelId:     { reg: '$sgpr15' }
-    implicitArgPtr:  { reg: '$sgpr8_sgpr9' }
-    workItemIDX:     { reg: '$vgpr31', mask: 1023 }
-    workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
-    workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
-  psInputAddr:     0
-  psInputEnable:   0
-  mode:
-    ieee:            true
-    dx10-clamp:      true
-    fp32-input-denormals: true
-    fp32-output-denormals: true
-    fp64-fp16-input-denormals: true
-    fp64-fp16-output-denormals: true
-  highBitsOf32BitAddress: 0
-  occupancy:       8
-  wwmReservedRegs:
-    - '$vgpr63'
-    - '$vgpr40'
-  scavengeFI:      '%stack.12'
-  vgprForAGPRCopy: ''
-  sgprForEXECCopy: ''
-  longBranchReservedReg: ''
 body:             |
   bb.0:
     liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-
     ; CHECK-LABEL: name: bug
-    ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr41, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+    ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: $sgpr16 = COPY $sgpr33
-    ; CHECK-NEXT: $sgpr33 = frame-setup COPY $sgpr32
-    ; CHECK-NEXT: $sgpr18_sgpr19 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 56, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 60, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
-    ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr18_sgpr19
-    ; CHECK-NEXT: $vgpr41 = SI_SPILL_S32_TO_VGPR $sgpr16, 0, undef $vgpr41
-    ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 5120, implicit-def dead $scc
     ; CHECK-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
     ; CHECK-NEXT: renamable $sgpr17 = S_MOV_B32 0
     ; CHECK-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
-    ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr33, 6, implicit-def dead $scc
+    ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
     ; CHECK-NEXT: renamable $sgpr29 = COPY undef renamable $sgpr30
-    ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
-    ; CHECK-NEXT: $sgpr20 = S_ADD_I32 killed $sgpr20, 8, implicit-def $scc
+    ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+    ; CHECK-NEXT: $sgpr20 = S_ADD_I32 killed $sgpr20, 4, implicit-def $scc
     ; CHECK-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
     ; CHECK-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
-    ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
-    ; CHECK-NEXT: $sgpr31 = S_ADD_I32 killed $sgpr31, 24, implicit-def $scc
+    ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+    ; CHECK-NEXT: $sgpr31 = S_ADD_I32 killed $sgpr31, 8, implicit-def $scc
     ; CHECK-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
     ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
     ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
@@ -144,24 +58,16 @@ body:             |
     ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
     ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
     ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @killer, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
-    ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr41, 0
-    ; CHECK-NEXT: $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; CHECK-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 56, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
-    ; CHECK-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 60, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
-    ; CHECK-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
-    ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
-    ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -5120, implicit-def dead $scc
-    ; CHECK-NEXT: $sgpr33 = COPY $sgpr4
     ; CHECK-NEXT: SI_RETURN
     renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
     renamable $sgpr17 = S_MOV_B32 0
     undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
     renamable $sgpr24 = S_MOV_B32 %stack.0
     renamable $sgpr29 = COPY undef renamable $sgpr30
-    renamable $sgpr20 = S_MOV_B32 %stack.2
+    renamable $sgpr20 = S_MOV_B32 %stack.1
     undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
     undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
-    renamable $sgpr31 = S_MOV_B32 %stack.6
+    renamable $sgpr31 = S_MOV_B32 %stack.2
     renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
     renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
     renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec

>From 126bd3078ec68c9b1abcc99d0bb61048d89c9011 Mon Sep 17 00:00:00 2001
From: PankajDwivedi-25 <pankajkumar.divedi at amd.com>
Date: Fri, 19 Jul 2024 19:00:09 +0530
Subject: [PATCH 3/8] refactored,checks with gfx90a

---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |   6 +-
 llvm/test/CodeGen/AMDGPU/frame-index.mir      | 147 +++++++++---------
 .../CodeGen/AMDGPU/reduce-frame-index.mir     |  26 +---
 3 files changed, 84 insertions(+), 95 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index b08558ae073a2..2f210f4dde7b5 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2536,7 +2536,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                 IsCopy && IsSALU
                     ? ResultReg
                     : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
-                                                    MI, false, 0, false);
+                                                    MI, false, 0, /*AllowSpill=*/false);
             Register ScaledReg =
                 TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
             Register TmpResultReg = ScaledReg;
@@ -2550,7 +2550,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                   .addImm(Offset);
             } else {
               TmpResultReg = RS->scavengeRegisterBackwards(
-                  AMDGPU::VGPR_32RegClass, MI, false, 0);
+                  AMDGPU::VGPR_32RegClass, MI, false, 0, /*AllowSpill=*/true);
               BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHR_B32_e64),
                       TmpResultReg)
                   .addImm(ST.getWavefrontSizeLog2())
@@ -2561,7 +2561,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
               Register NewDest =
                   IsCopy ? ResultReg
                          : RS->scavengeRegisterBackwards(
-                               AMDGPU::SReg_32RegClass, Add, false, 0);
+                               AMDGPU::SReg_32RegClass, Add, false, 0, /*AllowSpill=*/true);
               BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
                       NewDest)
                   .addReg(TmpResultReg);
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index f4476a497f754..ccc14d03ea9a5 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s
 
 ---
 name: func_add_constant_to_fi_divergent_i32
@@ -359,24 +359,24 @@ body:             |
     liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
 
     ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
-    ; GCN: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+    ; GCN: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+    ; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec
+    ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec
+    ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec
+    ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec
+    ; GCN-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec
+    ; GCN-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec
+    ; GCN-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec
+    ; GCN-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec
+    ; GCN-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec
+    ; GCN-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec
+    ; GCN-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec
+    ; GCN-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec
+    ; GCN-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec
+    ; GCN-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec
+    ; GCN-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec
+    ; GCN-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -386,11 +386,10 @@ body:             |
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
-    ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
-    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
     ; GCN-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GCN-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GCN-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -399,22 +398,22 @@ body:             |
     ; GCN-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GCN-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GCN-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
-    ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
-    ; GCN-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
-    ; GCN-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
-    ; GCN-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
-    ; GCN-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
-    ; GCN-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
-    ; GCN-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
-    ; GCN-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
-    ; GCN-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GCN-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
-    ; GCN-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-    ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec
+    ; GCN-NEXT: $vgpr62 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec
+    ; GCN-NEXT: $vgpr61 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec
+    ; GCN-NEXT: $vgpr60 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec
+    ; GCN-NEXT: $vgpr59 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec
+    ; GCN-NEXT: $vgpr58 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec
+    ; GCN-NEXT: $vgpr57 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec
+    ; GCN-NEXT: $vgpr56 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec
+    ; GCN-NEXT: $vgpr47 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec
+    ; GCN-NEXT: $vgpr46 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec
+    ; GCN-NEXT: $vgpr45 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec
+    ; GCN-NEXT: $vgpr44 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
+    ; GCN-NEXT: $vgpr43 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
+    ; GCN-NEXT: $vgpr42 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
+    ; GCN-NEXT: $vgpr41 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
+    ; GCN-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
     S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
@@ -455,24 +454,24 @@ body:             |
     liveins: $sgpr4, $sgpr5
 
     ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
-    ; GCN: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+    ; GCN: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec
+    ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec
+    ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec
+    ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec
+    ; GCN-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec
+    ; GCN-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec
+    ; GCN-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec
+    ; GCN-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec
+    ; GCN-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec
+    ; GCN-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec
+    ; GCN-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec
+    ; GCN-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec
+    ; GCN-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec
+    ; GCN-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec
+    ; GCN-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec
+    ; GCN-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -482,11 +481,11 @@ body:             |
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
     ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
-    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
     ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
     ; GCN-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GCN-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GCN-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -495,22 +494,22 @@ body:             |
     ; GCN-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GCN-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GCN-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
-    ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
-    ; GCN-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
-    ; GCN-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
-    ; GCN-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
-    ; GCN-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
-    ; GCN-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
-    ; GCN-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
-    ; GCN-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
-    ; GCN-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
-    ; GCN-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GCN-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
-    ; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GCN-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec
+    ; GCN-NEXT: $vgpr62 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec
+    ; GCN-NEXT: $vgpr61 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec
+    ; GCN-NEXT: $vgpr60 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec
+    ; GCN-NEXT: $vgpr59 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec
+    ; GCN-NEXT: $vgpr58 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec
+    ; GCN-NEXT: $vgpr57 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec
+    ; GCN-NEXT: $vgpr56 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec
+    ; GCN-NEXT: $vgpr47 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec
+    ; GCN-NEXT: $vgpr46 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec
+    ; GCN-NEXT: $vgpr45 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec
+    ; GCN-NEXT: $vgpr44 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
+    ; GCN-NEXT: $vgpr43 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
+    ; GCN-NEXT: $vgpr42 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
+    ; GCN-NEXT: $vgpr41 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
+    ; GCN-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
     S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
diff --git a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
index 0661bf2778bb7..a68455cb24ffe 100644
--- a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
@@ -1,18 +1,8 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s
---- |
-  target triple = "amdgcn-amd-amdhsa"
 
-  declare double @killer()
-  define void @bug() #0 {
-    ret void
-  }
-
-  attributes #0 = { "amdgpu-stack-objects" "target-cpu"="gfx90a" }
-
-...
 ---
-name:            bug
+name: s_copy_frame_index_elimination_failure_pei
 tracksRegLiveness: true
 stack:
   - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
@@ -35,19 +25,19 @@ machineFunctionInfo:
 body:             |
   bb.0:
     liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-    ; CHECK-LABEL: name: bug
+    ; CHECK-LABEL: name: s_copy_frame_index_elimination_failure_pei
     ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
     ; CHECK-NEXT: renamable $sgpr17 = S_MOV_B32 0
-    ; CHECK-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
-    ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; CHECK-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc_lo
+    ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
     ; CHECK-NEXT: renamable $sgpr29 = COPY undef renamable $sgpr30
-    ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+    ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
     ; CHECK-NEXT: $sgpr20 = S_ADD_I32 killed $sgpr20, 4, implicit-def $scc
     ; CHECK-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
     ; CHECK-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
-    ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+    ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
     ; CHECK-NEXT: $sgpr31 = S_ADD_I32 killed $sgpr31, 8, implicit-def $scc
     ; CHECK-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
     ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
@@ -57,7 +47,7 @@ body:             |
     ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr22_sgpr23, implicit $exec
     ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
     ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
-    ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @killer, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+    ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
     ; CHECK-NEXT: SI_RETURN
     renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
     renamable $sgpr17 = S_MOV_B32 0
@@ -76,7 +66,7 @@ body:             |
     renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr22_sgpr23, implicit $exec
     renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
     renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
-    dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @killer, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+    dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
     SI_RETURN
 
 ...

>From eb1e3031c994e4d65e8b01ba436ff6b55c475388 Mon Sep 17 00:00:00 2001
From: PankajDwivedi-25 <pankajkumar.divedi at amd.com>
Date: Fri, 19 Jul 2024 19:04:47 +0530
Subject: [PATCH 4/8] refactored,checks with gfx803

---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp |  18 +--
 llvm/test/CodeGen/AMDGPU/frame-index.mir  | 147 +++++++++++-----------
 2 files changed, 83 insertions(+), 82 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 2f210f4dde7b5..c94595f3ddd20 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2532,11 +2532,11 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
 
             // We may have 1 free scratch SGPR even though a carry out is
             // unavailable. Only one additional mov is needed.
-            Register TmpScaledReg =
-                IsCopy && IsSALU
-                    ? ResultReg
-                    : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
-                                                    MI, false, 0, /*AllowSpill=*/false);
+            Register TmpScaledReg = IsCopy && IsSALU
+                                        ? ResultReg
+                                        : RS->scavengeRegisterBackwards(
+                                              AMDGPU::SReg_32_XM0RegClass, MI,
+                                              false, 0, /*AllowSpill=*/false);
             Register ScaledReg =
                 TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
             Register TmpResultReg = ScaledReg;
@@ -2558,10 +2558,10 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
               auto Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32),
                                  TmpResultReg);
               Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
-              Register NewDest =
-                  IsCopy ? ResultReg
-                         : RS->scavengeRegisterBackwards(
-                               AMDGPU::SReg_32RegClass, Add, false, 0, /*AllowSpill=*/true);
+              Register NewDest = IsCopy ? ResultReg
+                                        : RS->scavengeRegisterBackwards(
+                                              AMDGPU::SReg_32RegClass, Add,
+                                              false, 0, /*AllowSpill=*/true);
               BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
                       NewDest)
                   .addReg(TmpResultReg);
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index ccc14d03ea9a5..a0301ed267eed 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s
 
 ---
 name: func_add_constant_to_fi_divergent_i32
@@ -359,24 +359,24 @@ body:             |
     liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
 
     ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
-    ; GCN: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GCN: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec
-    ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec
-    ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec
-    ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec
-    ; GCN-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec
-    ; GCN-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec
-    ; GCN-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec
-    ; GCN-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec
-    ; GCN-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec
-    ; GCN-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec
-    ; GCN-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec
-    ; GCN-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec
-    ; GCN-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec
-    ; GCN-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec
-    ; GCN-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec
-    ; GCN-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -386,10 +386,11 @@ body:             |
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
-    ; GCN-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
     ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
     ; GCN-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GCN-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GCN-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -398,22 +399,22 @@ body:             |
     ; GCN-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GCN-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GCN-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GCN-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec
-    ; GCN-NEXT: $vgpr62 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec
-    ; GCN-NEXT: $vgpr61 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec
-    ; GCN-NEXT: $vgpr60 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec
-    ; GCN-NEXT: $vgpr59 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec
-    ; GCN-NEXT: $vgpr58 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec
-    ; GCN-NEXT: $vgpr57 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec
-    ; GCN-NEXT: $vgpr56 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec
-    ; GCN-NEXT: $vgpr47 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec
-    ; GCN-NEXT: $vgpr46 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec
-    ; GCN-NEXT: $vgpr45 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec
-    ; GCN-NEXT: $vgpr44 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
-    ; GCN-NEXT: $vgpr43 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
-    ; GCN-NEXT: $vgpr42 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
-    ; GCN-NEXT: $vgpr41 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
-    ; GCN-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
+    ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+    ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+    ; GCN-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+    ; GCN-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+    ; GCN-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+    ; GCN-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+    ; GCN-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+    ; GCN-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+    ; GCN-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+    ; GCN-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+    ; GCN-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+    ; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+    ; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+    ; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
     S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
@@ -454,24 +455,24 @@ body:             |
     liveins: $sgpr4, $sgpr5
 
     ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
-    ; GCN: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+    ; GCN: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec
-    ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec
-    ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec
-    ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec
-    ; GCN-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec
-    ; GCN-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec
-    ; GCN-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec
-    ; GCN-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec
-    ; GCN-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec
-    ; GCN-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec
-    ; GCN-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec
-    ; GCN-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec
-    ; GCN-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec
-    ; GCN-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec
-    ; GCN-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec
-    ; GCN-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -481,11 +482,11 @@ body:             |
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
     ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
-    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec
     ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
     ; GCN-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GCN-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GCN-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -494,22 +495,22 @@ body:             |
     ; GCN-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GCN-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GCN-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GCN-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec
-    ; GCN-NEXT: $vgpr62 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec
-    ; GCN-NEXT: $vgpr61 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec
-    ; GCN-NEXT: $vgpr60 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec
-    ; GCN-NEXT: $vgpr59 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec
-    ; GCN-NEXT: $vgpr58 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec
-    ; GCN-NEXT: $vgpr57 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec
-    ; GCN-NEXT: $vgpr56 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec
-    ; GCN-NEXT: $vgpr47 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec
-    ; GCN-NEXT: $vgpr46 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec
-    ; GCN-NEXT: $vgpr45 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec
-    ; GCN-NEXT: $vgpr44 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
-    ; GCN-NEXT: $vgpr43 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
-    ; GCN-NEXT: $vgpr42 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
-    ; GCN-NEXT: $vgpr41 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
-    ; GCN-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
+    ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+    ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+    ; GCN-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+    ; GCN-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+    ; GCN-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+    ; GCN-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+    ; GCN-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+    ; GCN-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+    ; GCN-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+    ; GCN-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+    ; GCN-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+    ; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+    ; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+    ; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+    ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
     S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15

>From aa340100ec3d8022d1422da9f394dab5c0959451 Mon Sep 17 00:00:00 2001
From: PankajDwivedi-25 <pankajkumar.divedi at amd.com>
Date: Fri, 19 Jul 2024 19:53:21 +0530
Subject: [PATCH 5/8] [WIP] updated check for gfx8,gfx9

---
 llvm/test/CodeGen/AMDGPU/frame-index.mir      | 471 +++++++++++++-----
 .../CodeGen/AMDGPU/reduce-frame-index.mir     |   8 +-
 2 files changed, 358 insertions(+), 121 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index a0301ed267eed..03b972fb2b136 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -1,5 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=GFX8,GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=GFX900,GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=GFX90A,GCN %s
 
 ---
 name: func_add_constant_to_fi_divergent_i32
@@ -358,64 +360,181 @@ body:             |
   bb.0:
     liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
 
-    ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
-    ; GCN: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
-    ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
-    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
-    ; GCN-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
-    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
-    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
-    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
-    ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
-    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
-    ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; GCN-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GCN-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
-    ; GCN-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
-    ; GCN-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
-    ; GCN-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
-    ; GCN-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
-    ; GCN-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
-    ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
-    ; GCN-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
-    ; GCN-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
-    ; GCN-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
-    ; GCN-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
-    ; GCN-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
-    ; GCN-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
-    ; GCN-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
-    ; GCN-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GCN-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
-    ; GCN-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-    ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+    ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
+    ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+    ; GFX8-NEXT: {{  $}}
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX8-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+    ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+    ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+    ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+    ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+    ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+    ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+    ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+    ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+    ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+    ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+    ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+    ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+    ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+    ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+    ;
+    ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
+    ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+    ; GFX900-NEXT: {{  $}}
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX900-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+    ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX900-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+    ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+    ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+    ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+    ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+    ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+    ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+    ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+    ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+    ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+    ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+    ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+    ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+    ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+    ;
+    ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
+    ; GFX90A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec
+    ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec
+    ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec
+    ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec
+    ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec
+    ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec
+    ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec
+    ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec
+    ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec
+    ; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec
+    ; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec
+    ; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec
+    ; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec
+    ; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec
+    ; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec
+    ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX90A-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX90A-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec
+    ; GFX90A-NEXT: $vgpr62 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec
+    ; GFX90A-NEXT: $vgpr61 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec
+    ; GFX90A-NEXT: $vgpr60 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec
+    ; GFX90A-NEXT: $vgpr59 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec
+    ; GFX90A-NEXT: $vgpr58 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec
+    ; GFX90A-NEXT: $vgpr57 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec
+    ; GFX90A-NEXT: $vgpr56 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec
+    ; GFX90A-NEXT: $vgpr47 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec
+    ; GFX90A-NEXT: $vgpr46 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec
+    ; GFX90A-NEXT: $vgpr45 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec
+    ; GFX90A-NEXT: $vgpr44 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
+    ; GFX90A-NEXT: $vgpr43 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
+    ; GFX90A-NEXT: $vgpr42 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
+    ; GFX90A-NEXT: $vgpr41 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
+    ; GFX90A-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
+    ; GFX90A-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
     S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -454,64 +573,182 @@ body:             |
   bb.0:
     liveins: $sgpr4, $sgpr5
 
-    ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
-    ; GCN: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
-    ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
-    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
-    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
-    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
-    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
-    ; GCN-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
-    ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
-    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec
-    ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
-    ; GCN-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; GCN-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GCN-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
-    ; GCN-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
-    ; GCN-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
-    ; GCN-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
-    ; GCN-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
-    ; GCN-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
-    ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
-    ; GCN-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
-    ; GCN-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
-    ; GCN-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
-    ; GCN-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
-    ; GCN-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
-    ; GCN-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
-    ; GCN-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
-    ; GCN-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
-    ; GCN-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GCN-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
-    ; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-    ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+    ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
+    ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+    ; GFX8-NEXT: {{  $}}
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+    ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX8-NEXT: $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec
+    ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+    ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+    ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+    ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+    ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+    ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+    ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+    ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+    ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+    ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+    ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+    ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+    ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+    ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+    ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+    ;
+    ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
+    ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+    ; GFX900-NEXT: {{  $}}
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+    ; GFX900-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec
+    ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX900-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+    ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+    ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+    ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+    ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+    ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+    ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+    ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+    ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+    ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+    ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+    ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+    ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+    ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+    ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+    ;
+    ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
+    ; GFX90A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec
+    ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec
+    ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec
+    ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec
+    ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec
+    ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec
+    ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec
+    ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec
+    ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec
+    ; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec
+    ; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec
+    ; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec
+    ; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec
+    ; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec
+    ; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec
+    ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+    ; GFX90A-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+    ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX90A-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec
+    ; GFX90A-NEXT: $vgpr62 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec
+    ; GFX90A-NEXT: $vgpr61 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec
+    ; GFX90A-NEXT: $vgpr60 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec
+    ; GFX90A-NEXT: $vgpr59 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec
+    ; GFX90A-NEXT: $vgpr58 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec
+    ; GFX90A-NEXT: $vgpr57 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec
+    ; GFX90A-NEXT: $vgpr56 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec
+    ; GFX90A-NEXT: $vgpr47 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec
+    ; GFX90A-NEXT: $vgpr46 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec
+    ; GFX90A-NEXT: $vgpr45 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec
+    ; GFX90A-NEXT: $vgpr44 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
+    ; GFX90A-NEXT: $vgpr43 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
+    ; GFX90A-NEXT: $vgpr42 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
+    ; GFX90A-NEXT: $vgpr41 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
+    ; GFX90A-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
+    ; GFX90A-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
     S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
diff --git a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
index a68455cb24ffe..6ae876fd21811 100644
--- a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
@@ -30,14 +30,14 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
     ; CHECK-NEXT: renamable $sgpr17 = S_MOV_B32 0
-    ; CHECK-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc_lo
-    ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+    ; CHECK-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
+    ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
     ; CHECK-NEXT: renamable $sgpr29 = COPY undef renamable $sgpr30
-    ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
+    ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
     ; CHECK-NEXT: $sgpr20 = S_ADD_I32 killed $sgpr20, 4, implicit-def $scc
     ; CHECK-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
     ; CHECK-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
-    ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
+    ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
     ; CHECK-NEXT: $sgpr31 = S_ADD_I32 killed $sgpr31, 8, implicit-def $scc
     ; CHECK-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
     ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec

>From ab19c1ecbf4a5efe50cb92f1d5b6442d4186d79b Mon Sep 17 00:00:00 2001
From: PankajDwivedi-25 <pankajkumar.divedi at amd.com>
Date: Wed, 24 Jul 2024 15:44:02 +0530
Subject: [PATCH 6/8] [AMDGPU] this patch fixes all cases

---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |  29 +-
 llvm/test/CodeGen/AMDGPU/frame-index.mir      |  76 ++-
 .../CodeGen/AMDGPU/reduce-frame-index.mir     | 554 +++++++++++++++++-
 3 files changed, 611 insertions(+), 48 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index c94595f3ddd20..27485be56b8de 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2555,12 +2555,33 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                       TmpResultReg)
                   .addImm(ST.getWavefrontSizeLog2())
                   .addReg(FrameReg);
-              auto Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32),
-                                 TmpResultReg);
-              Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
+
+              MachineInstrBuilder Add;
+              if ((Add = TII->getAddNoCarry(*MBB, MI, DL, TmpResultReg, *RS)) ==
+                  nullptr) {
+                // VCC is live and no SGPR is free.
+                // since emergency stack slot is already used for spilling VGPR
+                // scavenged? This a way around to avoid carry, need follow-up.
+                BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ResultReg)
+                    .addImm(Offset);
+                Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MAD_I32_I24_e64),
+                              TmpResultReg)
+                          .addReg(TmpResultReg, RegState::Kill)
+                          .addImm(1)
+                          .addReg(ResultReg, RegState::Kill)
+                          .addImm(0);
+              } else if (Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
+                BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::S_MOV_B32), ResultReg)
+                    .addImm(Offset);
+                Add.addReg(ResultReg, RegState::Kill)
+                    .addReg(TmpResultReg, RegState::Kill)
+                    .addImm(0);
+              } else
+                Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
+
               Register NewDest = IsCopy ? ResultReg
                                         : RS->scavengeRegisterBackwards(
-                                              AMDGPU::SReg_32RegClass, Add,
+                                              AMDGPU::SReg_32RegClass, *Add,
                                               false, 0, /*AllowSpill=*/true);
               BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
                       NewDest)
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index 03b972fb2b136..a41eb26db899f 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -305,14 +305,33 @@ body:             |
   bb.0:
     liveins: $sgpr4, $sgpr5
 
-    ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc
-    ; GCN: liveins: $sgpr4, $sgpr5
-    ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
-    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
-    ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+    ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc
+    ; GFX8: liveins: $sgpr4, $sgpr5
+    ; GFX8-NEXT: {{  $}}
+    ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX8-NEXT: $sgpr4 = S_MOV_B32 64
+    ; GFX8-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
+    ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+    ;
+    ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc
+    ; GFX900: liveins: $sgpr4, $sgpr5
+    ; GFX900-NEXT: {{  $}}
+    ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX900-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+    ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+    ;
+    ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc
+    ; GFX90A: liveins: $sgpr4, $sgpr5
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX90A-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+    ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX90A-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
   S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
   renamable $sgpr4 = S_MOV_B32 %stack.1
   S_ENDPGM 0, implicit $sgpr4, implicit $scc
@@ -331,14 +350,33 @@ body:             |
   bb.0:
     liveins: $sgpr4, $sgpr5
 
-    ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc
-    ; GCN: liveins: $sgpr4, $sgpr5
-    ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
-    ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 68, killed $vgpr0, implicit $exec
-    ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+    ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc
+    ; GFX8: liveins: $sgpr4, $sgpr5
+    ; GFX8-NEXT: {{  $}}
+    ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX8-NEXT: $sgpr4 = S_MOV_B32 68
+    ; GFX8-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
+    ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+    ;
+    ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc
+    ; GFX900: liveins: $sgpr4, $sgpr5
+    ; GFX900-NEXT: {{  $}}
+    ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX900-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 68, killed $vgpr0, implicit $exec
+    ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+    ;
+    ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc
+    ; GFX90A: liveins: $sgpr4, $sgpr5
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX90A-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 68, killed $vgpr0, implicit $exec
+    ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX90A-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
   S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
   renamable $sgpr4 = S_MOV_B32 %stack.1
   S_ENDPGM 0, implicit $sgpr4, implicit $scc
@@ -390,7 +428,8 @@ body:             |
     ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
     ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
     ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
-    ; GFX8-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+    ; GFX8-NEXT: $sgpr4 = S_MOV_B32 64
+    ; GFX8-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
     ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
     ; GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
@@ -603,7 +642,8 @@ body:             |
     ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
     ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
     ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
-    ; GFX8-NEXT: $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec
+    ; GFX8-NEXT: $sgpr4 = S_MOV_B32 128
+    ; GFX8-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
     ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
     ; GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
diff --git a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
index 6ae876fd21811..4f08011d7b009 100644
--- a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
@@ -1,5 +1,7 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=GFX8,GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=GFX900,GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=GFX90A,GCN %s
 
 ---
 name: s_copy_frame_index_elimination_failure_pei
@@ -25,30 +27,30 @@ machineFunctionInfo:
 body:             |
   bb.0:
     liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-    ; CHECK-LABEL: name: s_copy_frame_index_elimination_failure_pei
-    ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
-    ; CHECK-NEXT: renamable $sgpr17 = S_MOV_B32 0
-    ; CHECK-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
-    ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
-    ; CHECK-NEXT: renamable $sgpr29 = COPY undef renamable $sgpr30
-    ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
-    ; CHECK-NEXT: $sgpr20 = S_ADD_I32 killed $sgpr20, 4, implicit-def $scc
-    ; CHECK-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
-    ; CHECK-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
-    ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
-    ; CHECK-NEXT: $sgpr31 = S_ADD_I32 killed $sgpr31, 8, implicit-def $scc
-    ; CHECK-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
-    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
-    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
-    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr26_sgpr27, implicit $exec
-    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr24_sgpr25, implicit $exec
-    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr22_sgpr23, implicit $exec
-    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
-    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
-    ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
-    ; CHECK-NEXT: SI_RETURN
+    ; GCN-LABEL: name: s_copy_frame_index_elimination_failure_pei
+    ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
+    ; GCN-NEXT: renamable $sgpr17 = S_MOV_B32 0
+    ; GCN-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
+    ; GCN-NEXT: $sgpr24 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; GCN-NEXT: renamable $sgpr29 = COPY undef renamable $sgpr30
+    ; GCN-NEXT: $sgpr20 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+    ; GCN-NEXT: $sgpr20 = S_ADD_I32 killed $sgpr20, 4, implicit-def $scc
+    ; GCN-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
+    ; GCN-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
+    ; GCN-NEXT: $sgpr31 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+    ; GCN-NEXT: $sgpr31 = S_ADD_I32 killed $sgpr31, 8, implicit-def $scc
+    ; GCN-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr26_sgpr27, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr24_sgpr25, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr22_sgpr23, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
+    ; GCN-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+    ; GCN-NEXT: SI_RETURN
     renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
     renamable $sgpr17 = S_MOV_B32 0
     undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
@@ -70,3 +72,503 @@ body:             |
     SI_RETURN
 
 ...
+
+---
+name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: default, size: 64, alignment: 16, stack-id: default }
+  - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+  stackPtrOffsetReg: '$sgpr32'
+body:   |
+  bb.0:
+    liveins: $sgpr4, $sgpr5, $vgpr0
+
+    ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc
+    ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0
+    ; GFX8-NEXT: {{  $}}
+    ; GFX8-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX8-NEXT: $sgpr4 = S_MOV_B32 64
+    ; GFX8-NEXT: $vgpr0, dead $sgpr0_sgpr1 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
+    ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+    ;
+    ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc
+    ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0
+    ; GFX900-NEXT: {{  $}}
+    ; GFX900-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX900-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+    ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+    ;
+    ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc
+    ; GFX90A: liveins: $sgpr4, $sgpr5, $vgpr0
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX90A-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+    ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX90A-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+  V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+  S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+  renamable $sgpr4 = S_MOV_B32 %stack.1
+  S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+
+...
+
+
+---
+name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: default, size: 64, alignment: 16, stack-id: default }
+  - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+  scratchRSrcReg:  $sgpr0_sgpr1_sgpr2_sgpr3
+  stackPtrOffsetReg: '$sgpr32'
+body:   |
+  bb.0:
+    liveins: $sgpr4, $sgpr5, $vgpr0
+
+
+    ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
+    ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+    ; GFX8-NEXT: {{  $}}
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX8-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+    ; GFX8-NEXT: $vgpr1 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX8-NEXT: $sgpr4 = S_MOV_B32 128
+    ; GFX8-NEXT: $vgpr1, dead $sgpr6_sgpr7 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec
+    ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+    ; GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX8-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+    ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+    ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+    ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+    ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+    ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+    ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+    ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+    ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+    ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+    ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+    ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+    ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+    ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+    ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+    ;
+    ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
+    ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+    ; GFX900-NEXT: {{  $}}
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX900-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+    ; GFX900-NEXT: $vgpr1 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+    ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+    ; GFX900-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX900-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+    ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+    ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+    ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+    ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+    ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+    ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+    ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+    ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+    ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+    ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+    ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+    ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+    ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+    ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+    ;
+    ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
+    ; GFX90A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec
+    ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec
+    ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec
+    ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec
+    ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec
+    ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec
+    ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec
+    ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec
+    ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec
+    ; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec
+    ; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec
+    ; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec
+    ; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec
+    ; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec
+    ; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec
+    ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX90A-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+    ; GFX90A-NEXT: $vgpr1 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 64, killed $vgpr1, implicit $exec
+    ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+    ; GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+    ; GFX90A-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+    ; GFX90A-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec
+    ; GFX90A-NEXT: $vgpr62 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec
+    ; GFX90A-NEXT: $vgpr61 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec
+    ; GFX90A-NEXT: $vgpr60 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec
+    ; GFX90A-NEXT: $vgpr59 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec
+    ; GFX90A-NEXT: $vgpr58 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec
+    ; GFX90A-NEXT: $vgpr57 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec
+    ; GFX90A-NEXT: $vgpr56 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec
+    ; GFX90A-NEXT: $vgpr47 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec
+    ; GFX90A-NEXT: $vgpr46 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec
+    ; GFX90A-NEXT: $vgpr45 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec
+    ; GFX90A-NEXT: $vgpr44 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
+    ; GFX90A-NEXT: $vgpr43 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
+    ; GFX90A-NEXT: $vgpr42 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
+    ; GFX90A-NEXT: $vgpr41 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
+    ; GFX90A-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
+    ; GFX90A-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+  S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+  S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+  S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+  S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+  S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+  S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+  S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+  S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+
+  V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+  S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+  renamable $sgpr4 = S_MOV_B32 %stack.1
+
+  S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+  S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+  S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+  S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+  S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+  S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+  S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+  S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+  S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+...
+
+
+---
+name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_sgpr
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: default, size: 64, alignment: 16, stack-id: default }
+  - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+  stackPtrOffsetReg: '$sgpr32'
+body:   |
+  bb.0:
+    liveins: $sgpr4, $sgpr5, $vgpr0
+
+
+    ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_sgpr
+    ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0
+    ; GFX8-NEXT: {{  $}}
+    ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+    ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+    ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+    ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+    ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+    ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+    ; GFX8-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX8-NEXT: $sgpr4 = S_MOV_B32 64
+    ; GFX8-NEXT: $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr0, 1, killed $sgpr4, 0, implicit $exec
+    ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX8-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+    ; GFX8-NEXT: S_NOP 0, implicit $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+    ; GFX8-NEXT: S_NOP 0, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+    ; GFX8-NEXT: S_NOP 0, implicit $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+    ; GFX8-NEXT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+    ; GFX8-NEXT: S_NOP 0, implicit $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+    ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+    ;
+    ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_sgpr
+    ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0
+    ; GFX900-NEXT: {{  $}}
+    ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+    ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+    ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+    ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+    ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+    ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+    ; GFX900-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX900-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+    ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX900-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX900-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX900-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+    ; GFX900-NEXT: S_NOP 0, implicit $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+    ; GFX900-NEXT: S_NOP 0, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+    ; GFX900-NEXT: S_NOP 0, implicit $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+    ; GFX900-NEXT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+    ; GFX900-NEXT: S_NOP 0, implicit $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+    ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+    ;
+    ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_sgpr
+    ; GFX90A: liveins: $sgpr4, $sgpr5, $vgpr0
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+    ; GFX90A-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX90A-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+    ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX90A-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX90A-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+    ; GFX90A-NEXT: S_NOP 0, implicit $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+    ; GFX90A-NEXT: S_NOP 0, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+    ; GFX90A-NEXT: S_NOP 0, implicit $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+    ; GFX90A-NEXT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+    ; GFX90A-NEXT: S_NOP 0, implicit $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+    ; GFX90A-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+  S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+  S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+  S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+  S_NOP 0, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+  S_NOP 0, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+  S_NOP 0, implicit-def $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+  S_NOP 0, implicit-def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+  S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+
+  V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+  S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+  renamable $sgpr4 = S_MOV_B32 %stack.1
+
+  S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+  S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+  S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+  S_NOP 0, implicit $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+  S_NOP 0, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+  S_NOP 0, implicit $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+  S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+  S_NOP 0, implicit $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+  S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+
+...
+
+---
+name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: default, size: 68, alignment: 16, stack-id: default }
+  - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    liveins: $sgpr4, $sgpr5, $vgpr0
+
+    ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr
+    ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0
+    ; GFX8-NEXT: {{  $}}
+    ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+    ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+    ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+    ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+    ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+    ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+    ; GFX8-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX8-NEXT: $sgpr4 = S_MOV_B32 68
+    ; GFX8-NEXT: $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr0, 1, killed $sgpr4, 0, implicit $exec
+    ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX8-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+    ; GFX8-NEXT: S_NOP 0, implicit $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+    ; GFX8-NEXT: S_NOP 0, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+    ; GFX8-NEXT: S_NOP 0, implicit $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+    ; GFX8-NEXT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+    ; GFX8-NEXT: S_NOP 0, implicit $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+    ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+    ;
+    ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr
+    ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0
+    ; GFX900-NEXT: {{  $}}
+    ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+    ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+    ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+    ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+    ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+    ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+    ; GFX900-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX900-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 68, killed $vgpr0, implicit $exec
+    ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX900-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX900-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX900-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+    ; GFX900-NEXT: S_NOP 0, implicit $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+    ; GFX900-NEXT: S_NOP 0, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+    ; GFX900-NEXT: S_NOP 0, implicit $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+    ; GFX900-NEXT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+    ; GFX900-NEXT: S_NOP 0, implicit $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+    ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+    ;
+    ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr
+    ; GFX90A: liveins: $sgpr4, $sgpr5, $vgpr0
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+    ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+    ; GFX90A-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+    ; GFX90A-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 68, killed $vgpr0, implicit $exec
+    ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+    ; GFX90A-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX90A-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GFX90A-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+    ; GFX90A-NEXT: S_NOP 0, implicit $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+    ; GFX90A-NEXT: S_NOP 0, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+    ; GFX90A-NEXT: S_NOP 0, implicit $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+    ; GFX90A-NEXT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+    ; GFX90A-NEXT: S_NOP 0, implicit $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+    ; GFX90A-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+  S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+  S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+  S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+  S_NOP 0, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+  S_NOP 0, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+  S_NOP 0, implicit-def $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+  S_NOP 0, implicit-def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+  S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+
+  V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+  S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+  renamable $sgpr4 = S_MOV_B32 %stack.1
+
+  S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+  S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+  S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+  S_NOP 0, implicit $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+  S_NOP 0, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+  S_NOP 0, implicit $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+  S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+  S_NOP 0, implicit $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+  S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+
+...

>From ce41a137c0d6527a4751da8413c8206e702a0d53 Mon Sep 17 00:00:00 2001
From: PankajDwivedi-25 <pankajkumar.divedi at amd.com>
Date: Wed, 24 Jul 2024 16:01:08 +0530
Subject: [PATCH 7/8] [AMDGPU] renamed test file

---
 ...e-index.mir => s_copy_frame_index_elimination_failure_pei.mir} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename llvm/test/CodeGen/AMDGPU/{reduce-frame-index.mir => s_copy_frame_index_elimination_failure_pei.mir} (100%)

diff --git a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir b/llvm/test/CodeGen/AMDGPU/s_copy_frame_index_elimination_failure_pei.mir
similarity index 100%
rename from llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
rename to llvm/test/CodeGen/AMDGPU/s_copy_frame_index_elimination_failure_pei.mir

>From 4254f3b2d568a5606dd61f03ee0e58b4417f15b2 Mon Sep 17 00:00:00 2001
From: PankajDwivedi-25 <pankajkumar.divedi at amd.com>
Date: Thu, 25 Jul 2024 01:25:15 +0530
Subject: [PATCH 8/8] [WIP] replaced v_mad_i32_i24 with unsigned version,
 refactored nested loop

---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     | 23 +++++++++++--------
 ...re_pei.mir => s_mov_lowering_handling.mir} |  4 ++--
 2 files changed, 15 insertions(+), 12 deletions(-)
 rename llvm/test/CodeGen/AMDGPU/{s_copy_frame_index_elimination_failure_pei.mir => s_mov_lowering_handling.mir} (99%)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 27485be56b8de..0baec28c98b97 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2557,27 +2557,30 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                   .addReg(FrameReg);
 
               MachineInstrBuilder Add;
-              if ((Add = TII->getAddNoCarry(*MBB, MI, DL, TmpResultReg, *RS)) ==
+              if ((Add = TII->getAddNoCarry(*MBB, MI, DL, TmpResultReg, *RS)) !=
                   nullptr) {
+                if (Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
+                  BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::S_MOV_B32),
+                          ResultReg)
+                      .addImm(Offset);
+                  Add.addReg(ResultReg, RegState::Kill)
+                      .addReg(TmpResultReg, RegState::Kill)
+                      .addImm(0);
+                } else
+                  Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
+              } else {
                 // VCC is live and no SGPR is free.
                 // since emergency stack slot is already used for spilling VGPR
                 // scavenged? This a way around to avoid carry, need follow-up.
                 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ResultReg)
                     .addImm(Offset);
-                Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MAD_I32_I24_e64),
+                Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MAD_U32_U24_e64),
                               TmpResultReg)
                           .addReg(TmpResultReg, RegState::Kill)
                           .addImm(1)
                           .addReg(ResultReg, RegState::Kill)
                           .addImm(0);
-              } else if (Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
-                BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::S_MOV_B32), ResultReg)
-                    .addImm(Offset);
-                Add.addReg(ResultReg, RegState::Kill)
-                    .addReg(TmpResultReg, RegState::Kill)
-                    .addImm(0);
-              } else
-                Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
+              }
 
               Register NewDest = IsCopy ? ResultReg
                                         : RS->scavengeRegisterBackwards(
diff --git a/llvm/test/CodeGen/AMDGPU/s_copy_frame_index_elimination_failure_pei.mir b/llvm/test/CodeGen/AMDGPU/s_mov_lowering_handling.mir
similarity index 99%
rename from llvm/test/CodeGen/AMDGPU/s_copy_frame_index_elimination_failure_pei.mir
rename to llvm/test/CodeGen/AMDGPU/s_mov_lowering_handling.mir
index 4f08011d7b009..d19f24da04017 100644
--- a/llvm/test/CodeGen/AMDGPU/s_copy_frame_index_elimination_failure_pei.mir
+++ b/llvm/test/CodeGen/AMDGPU/s_mov_lowering_handling.mir
@@ -370,7 +370,7 @@ body:   |
     ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
     ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
     ; GFX8-NEXT: $sgpr4 = S_MOV_B32 64
-    ; GFX8-NEXT: $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr0, 1, killed $sgpr4, 0, implicit $exec
+    ; GFX8-NEXT: $vgpr0 = V_MAD_U32_U24_e64 killed $vgpr0, 1, killed $sgpr4, 0, implicit $exec
     ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
     ; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
     ; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
@@ -485,7 +485,7 @@ body:             |
     ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
     ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
     ; GFX8-NEXT: $sgpr4 = S_MOV_B32 68
-    ; GFX8-NEXT: $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr0, 1, killed $sgpr4, 0, implicit $exec
+    ; GFX8-NEXT: $vgpr0 = V_MAD_U32_U24_e64 killed $vgpr0, 1, killed $sgpr4, 0, implicit $exec
     ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
     ; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
     ; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15



More information about the llvm-commits mailing list