[llvm] AMDGPU: Handle folding frame indexes into s_add_i32 (PR #101694)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 6 03:35:49 PDT 2024


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/101694

>From 527022614aced9039e1ea504520b7bcb91b232ae Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 1 Aug 2024 23:11:45 +0400
Subject: [PATCH] AMDGPU: Handle folding frame indexes into s_add_i32

---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |  63 ++++
 .../CodeGen/AMDGPU/GlobalISel/flat-scratch.ll |  34 +-
 .../eliminate-frame-index-s-add-i32.mir       | 356 ++++++++++--------
 llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll  |  19 +-
 llvm/test/CodeGen/AMDGPU/flat-scratch.ll      |  66 +---
 llvm/test/CodeGen/AMDGPU/frame-index.mir      |   4 +-
 .../local-stack-alloc-block-sp-reference.ll   |   8 +-
 7 files changed, 293 insertions(+), 257 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index dd4e0d53202d4..43054329304f0 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2268,7 +2268,70 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       MI->eraseFromParent();
       return true;
     }
+    case AMDGPU::S_ADD_I32: {
+      // TODO: Handle s_or_b32, s_and_b32.
+      MachineOperand &OtherOp = MI->getOperand(FIOperandNum == 1 ? 2 : 1);
 
+      assert(FrameReg || MFI->isBottomOfStack());
+
+      MachineOperand &DstOp = MI->getOperand(0);
+      const DebugLoc &DL = MI->getDebugLoc();
+      Register MaterializedReg = FrameReg;
+
+      // Defend against live scc, which should never happen in practice.
+      bool DeadSCC = MI->getOperand(3).isDead();
+
+      // Do an in-place scale of the wave offset to the lane offset.
+      if (FrameReg && !ST.enableFlatScratch()) {
+        BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_LSHR_B32))
+            .addDef(DstOp.getReg(), RegState::Renamable)
+            .addReg(FrameReg)
+            .addImm(ST.getWavefrontSizeLog2())
+            .setOperandDead(3); // Set SCC dead
+        MaterializedReg = DstOp.getReg();
+      }
+
+      // If we can't fold the other operand, do another increment.
+      if (!OtherOp.isImm() && MaterializedReg) {
+        auto AddI32 = BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_ADD_I32))
+                          .addDef(DstOp.getReg(), RegState::Renamable)
+                          .addReg(MaterializedReg)
+                          .add(OtherOp);
+        if (DeadSCC)
+          AddI32.setOperandDead(3);
+        MaterializedReg = DstOp.getReg();
+      }
+
+      int64_t NewOffset = FrameInfo.getObjectOffset(Index);
+
+      // For the non-immediate case, we could fall through to the default
+      // handling, but we do an in-place update of the result register here to
+      // avoid scavenging another register.
+      if (OtherOp.isImm())
+        NewOffset += OtherOp.getImm();
+
+      if (NewOffset == 0 && DeadSCC) {
+        MI->eraseFromParent();
+      } else if (!MaterializedReg && OtherOp.isImm()) {
+        // In a kernel, the address should just be an immediate.
+        // SCC should really be dead, but preserve the def just in case it
+        // isn't.
+        if (DeadSCC)
+          MI->removeOperand(3);
+        else
+          MI->getOperand(3).setIsDef(true);
+
+        MI->removeOperand(2);
+        MI->getOperand(1).ChangeToImmediate(NewOffset);
+        MI->setDesc(TII->get(AMDGPU::S_MOV_B32));
+      } else {
+        if (MaterializedReg)
+          OtherOp.ChangeToRegister(MaterializedReg, false);
+        FIOp.ChangeToImmediate(NewOffset);
+      }
+
+      return true;
+    }
     default: {
       // Other access to frame index
       const DebugLoc &DL = MI->getDebugLoc();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index f4fd803c8dda8..5ea88ee6b33ae 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -15,11 +15,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_lshl_b32 s1, s0, 2
 ; GFX9-NEXT:    s_and_b32 s0, s0, 15
-; GFX9-NEXT:    s_add_i32 s1, s1, 0
 ; GFX9-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX9-NEXT:    scratch_store_dword off, v0, s1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_add_i32 s0, s0, 0
 ; GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_endpgm
@@ -36,8 +34,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX10-NEXT:    s_and_b32 s1, s0, 15
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX10-NEXT:    s_lshl_b32 s1, s1, 2
-; GFX10-NEXT:    s_add_i32 s0, s0, 0
-; GFX10-NEXT:    s_add_i32 s1, s1, 0
 ; GFX10-NEXT:    scratch_store_dword off, v0, s0
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_load_dword v0, off, s1 glc dlc
@@ -51,11 +47,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
 ; GFX940-NEXT:    s_and_b32 s0, s0, 15
-; GFX940-NEXT:    s_add_i32 s1, s1, 0
 ; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_add_i32 s0, s0, 0
 ; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_endpgm
@@ -68,8 +62,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX11-NEXT:    s_and_b32 s1, s0, 15
 ; GFX11-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX11-NEXT:    s_lshl_b32 s1, s1, 2
-; GFX11-NEXT:    s_add_i32 s0, s0, 0
-; GFX11-NEXT:    s_add_i32 s1, s1, 0
 ; GFX11-NEXT:    scratch_store_b32 off, v0, s0 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    scratch_load_b32 v0, off, s1 glc dlc
@@ -84,8 +76,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX12-NEXT:    s_and_b32 s1, s0, 15
 ; GFX12-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX12-NEXT:    s_lshl_b32 s1, s1, 2
-; GFX12-NEXT:    s_add_co_i32 s0, s0, 0
-; GFX12-NEXT:    s_add_co_i32 s1, s1, 0
 ; GFX12-NEXT:    scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
 ; GFX12-NEXT:    s_wait_storecnt 0x0
 ; GFX12-NEXT:    scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -1042,13 +1032,13 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX9-LABEL: store_load_large_imm_offset_foo:
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, 13
 ; GFX9-NEXT:    s_movk_i32 s0, 0x3e80
-; GFX9-NEXT:    s_add_i32 s1, s32, 4
+; GFX9-NEXT:    v_mov_b32_e32 v0, 13
+; GFX9-NEXT:    s_add_i32 s0, s32, s0
 ; GFX9-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 15
-; GFX9-NEXT:    s_add_i32 s0, s0, s1
+; GFX9-NEXT:    s_add_i32 s0, s0, 4
 ; GFX9-NEXT:    scratch_store_dword off, v0, s0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
@@ -1059,10 +1049,10 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX10:       ; %bb.0: ; %bb
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 13
-; GFX10-NEXT:    v_mov_b32_e32 v1, 15
 ; GFX10-NEXT:    s_movk_i32 s0, 0x3e80
-; GFX10-NEXT:    s_add_i32 s1, s32, 4
-; GFX10-NEXT:    s_add_i32 s0, s0, s1
+; GFX10-NEXT:    v_mov_b32_e32 v1, 15
+; GFX10-NEXT:    s_add_i32 s0, s32, s0
+; GFX10-NEXT:    s_add_i32 s0, s0, 4
 ; GFX10-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_store_dword off, v1, s0
@@ -1074,13 +1064,13 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX940-LABEL: store_load_large_imm_offset_foo:
 ; GFX940:       ; %bb.0: ; %bb
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT:    v_mov_b32_e32 v0, 13
 ; GFX940-NEXT:    s_movk_i32 s0, 0x3e80
-; GFX940-NEXT:    s_add_i32 s1, s32, 4
+; GFX940-NEXT:    v_mov_b32_e32 v0, 13
+; GFX940-NEXT:    s_add_i32 s0, s32, s0
 ; GFX940-NEXT:    scratch_store_dword off, v0, s32 offset:4 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; GFX940-NEXT:    s_add_i32 s0, s0, s1
+; GFX940-NEXT:    s_add_i32 s0, s0, 4
 ; GFX940-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
@@ -1092,9 +1082,9 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
 ; GFX11-NEXT:    s_movk_i32 s0, 0x3e80
-; GFX11-NEXT:    s_add_i32 s1, s32, 4
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT:    s_add_i32 s0, s0, s1
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_add_i32 s0, s32, s0
+; GFX11-NEXT:    s_add_i32 s0, s0, 4
 ; GFX11-NEXT:    scratch_store_b32 off, v0, s32 offset:4 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    scratch_store_b32 off, v1, s0 dlc
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
index a09b39069e5c9..f4742bb7f3ebc 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
@@ -21,21 +21,21 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; MUBUFW64-LABEL: name: s_add_i32__inline_imm__fi_offset0
-    ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
-    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def dead $scc
+    ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 12, implicit-def dead $scc
     ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; MUBUFW32-LABEL: name: s_add_i32__inline_imm__fi_offset0
-    ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
-    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def dead $scc
+    ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 12, implicit-def dead $scc
     ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; FLATSCRW64-LABEL: name: s_add_i32__inline_imm__fi_offset0
-    ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 12, $sgpr32, implicit-def dead $scc
+    ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 12, implicit-def dead $scc
     ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; FLATSCRW32-LABEL: name: s_add_i32__inline_imm__fi_offset0
-    ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 12, $sgpr32, implicit-def dead $scc
+    ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 12, implicit-def dead $scc
     ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
     renamable $sgpr7 = S_ADD_I32 12, %stack.0, implicit-def dead $scc
     SI_RETURN implicit $sgpr7
@@ -54,21 +54,21 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__inline_imm
-    ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
-    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 12, implicit-def dead $scc
+    ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr7, implicit-def dead $scc
     ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__inline_imm
-    ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
-    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 12, implicit-def dead $scc
+    ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr7, implicit-def dead $scc
     ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__inline_imm
-    ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 12, implicit-def dead $scc
+    ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 12, $sgpr32, implicit-def dead $scc
     ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__inline_imm
-    ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 12, implicit-def dead $scc
+    ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 12, $sgpr32, implicit-def dead $scc
     ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
     renamable $sgpr7 = S_ADD_I32 %stack.0, 12, implicit-def dead $scc
     SI_RETURN implicit $sgpr7
@@ -88,25 +88,21 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; MUBUFW64-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm
-    ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
-    ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 16, implicit-def $scc
-    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def $scc
+    ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 28, implicit-def $scc
     ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; MUBUFW32-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm
-    ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
-    ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 16, implicit-def $scc
-    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def $scc
+    ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 28, implicit-def $scc
     ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; FLATSCRW64-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm
-    ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 16, implicit-def $scc
-    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def $scc
+    ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 28, implicit-def $scc
     ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; FLATSCRW32-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm
-    ; FLATSCRW32: $sgpr4 = S_ADD_I32 $sgpr32, 16, implicit-def $scc
-    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def $scc
+    ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 28, implicit-def $scc
     ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
     renamable $sgpr7 = S_ADD_I32 12, %stack.1, implicit-def $scc
     SI_RETURN implicit $sgpr7
@@ -125,21 +121,21 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; MUBUFW64-LABEL: name: s_add_i32__literal__fi_offset0
-    ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
-    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def dead $scc
+    ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 68, implicit-def dead $scc
     ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; MUBUFW32-LABEL: name: s_add_i32__literal__fi_offset0
-    ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
-    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def dead $scc
+    ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 68, implicit-def dead $scc
     ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; FLATSCRW64-LABEL: name: s_add_i32__literal__fi_offset0
-    ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 68, $sgpr32, implicit-def dead $scc
+    ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 68, implicit-def dead $scc
     ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; FLATSCRW32-LABEL: name: s_add_i32__literal__fi_offset0
-    ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 68, $sgpr32, implicit-def dead $scc
+    ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 68, implicit-def dead $scc
     ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
     renamable $sgpr7 = S_ADD_I32 68, %stack.0, implicit-def dead $scc
     SI_RETURN implicit $sgpr7
@@ -158,21 +154,21 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__literal
-    ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
-    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc
+    ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr7, implicit-def $scc
     ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__literal
-    ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
-    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc
+    ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr7, implicit-def $scc
     ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__literal
-    ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 68, implicit-def $scc
+    ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 68, $sgpr32, implicit-def $scc
     ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__literal
-    ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 68, implicit-def $scc
+    ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 68, $sgpr32, implicit-def $scc
     ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
     renamable $sgpr7 = S_ADD_I32 %stack.0, 68, implicit-def $scc
     SI_RETURN implicit $sgpr7
@@ -192,25 +188,21 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; MUBUFW64-LABEL: name: s_add_i32__literal__fi_offset96
-    ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
-    ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc
-    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def $scc
+    ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 164, implicit-def $scc
     ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; MUBUFW32-LABEL: name: s_add_i32__literal__fi_offset96
-    ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
-    ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc
-    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def $scc
+    ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 164, implicit-def $scc
     ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW64-LABEL: name: s_add_i32__literal__fi_offset96
-    ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc
-    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def $scc
+    ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 164, implicit-def $scc
     ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW32-LABEL: name: s_add_i32__literal__fi_offset96
-    ; FLATSCRW32: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc
-    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def $scc
+    ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 164, implicit-def $scc
     ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     renamable $sgpr7 = S_ADD_I32 68, %stack.1, implicit-def $scc
     SI_RETURN implicit $sgpr7, implicit $scc
@@ -230,25 +222,21 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; MUBUFW64-LABEL: name: s_add_i32____fi_offset96__literal
-    ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
-    ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc
-    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc
+    ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr7, implicit-def $scc
     ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; MUBUFW32-LABEL: name: s_add_i32____fi_offset96__literal
-    ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
-    ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc
-    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc
+    ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr7, implicit-def $scc
     ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW64-LABEL: name: s_add_i32____fi_offset96__literal
-    ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc
-    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc
+    ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 164, $sgpr32, implicit-def $scc
     ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW32-LABEL: name: s_add_i32____fi_offset96__literal
-    ; FLATSCRW32: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc
-    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc
+    ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 164, $sgpr32, implicit-def $scc
     ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     renamable $sgpr7 = S_ADD_I32 %stack.1, 68, implicit-def $scc
     SI_RETURN implicit $sgpr7, implicit $scc
@@ -270,27 +258,27 @@ body:             |
     ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_offset0
     ; MUBUFW64: liveins: $sgpr8
     ; MUBUFW64-NEXT: {{  $}}
-    ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
-    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
     ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_offset0
     ; MUBUFW32: liveins: $sgpr8
     ; MUBUFW32-NEXT: {{  $}}
-    ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
-    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
     ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_offset0
     ; FLATSCRW64: liveins: $sgpr8
     ; FLATSCRW64-NEXT: {{  $}}
-    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, $sgpr32, implicit-def dead $scc
+    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
     ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_offset0
     ; FLATSCRW32: liveins: $sgpr8
     ; FLATSCRW32-NEXT: {{  $}}
-    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, $sgpr32, implicit-def dead $scc
+    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
     ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
     renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc
     SI_RETURN implicit $sgpr7
@@ -312,15 +300,15 @@ body:             |
     ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__sgpr
     ; MUBUFW64: liveins: $sgpr8
     ; MUBUFW64-NEXT: {{  $}}
-    ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
-    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
     ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__sgpr
     ; MUBUFW32: liveins: $sgpr8
     ; MUBUFW32-NEXT: {{  $}}
-    ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
-    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
     ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__sgpr
@@ -355,31 +343,31 @@ body:             |
     ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset
     ; MUBUFW64: liveins: $sgpr8
     ; MUBUFW64-NEXT: {{  $}}
-    ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
-    ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc
-    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 80, implicit-def dead $scc
     ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset
     ; MUBUFW32: liveins: $sgpr8
     ; MUBUFW32-NEXT: {{  $}}
-    ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
-    ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc
-    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 80, implicit-def dead $scc
     ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset
     ; FLATSCRW64: liveins: $sgpr8
     ; FLATSCRW64-NEXT: {{  $}}
-    ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc
-    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc
+    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
+    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 80, implicit-def dead $scc
     ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
     ;
     ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset
     ; FLATSCRW32: liveins: $sgpr8
     ; FLATSCRW32-NEXT: {{  $}}
-    ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc
-    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc
+    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
+    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 80, implicit-def dead $scc
     ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
     renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.1, implicit-def dead $scc
     SI_RETURN implicit $sgpr7
@@ -402,71 +390,117 @@ body:             |
     ; MUBUFW64-LABEL: name: s_add_i32__fi_literal_offset__sgpr
     ; MUBUFW64: liveins: $sgpr8
     ; MUBUFW64-NEXT: {{  $}}
-    ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
-    ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc
-    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 80, $sgpr7, implicit-def $scc
     ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; MUBUFW32-LABEL: name: s_add_i32__fi_literal_offset__sgpr
     ; MUBUFW32: liveins: $sgpr8
     ; MUBUFW32-NEXT: {{  $}}
-    ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
-    ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc
-    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 80, $sgpr7, implicit-def $scc
     ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW64-LABEL: name: s_add_i32__fi_literal_offset__sgpr
     ; FLATSCRW64: liveins: $sgpr8
     ; FLATSCRW64-NEXT: {{  $}}
-    ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc
-    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc
+    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def $scc
+    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 80, $sgpr7, implicit-def $scc
     ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW32-LABEL: name: s_add_i32__fi_literal_offset__sgpr
     ; FLATSCRW32: liveins: $sgpr8
     ; FLATSCRW32-NEXT: {{  $}}
-    ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc
-    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc
+    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def $scc
+    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 80, $sgpr7, implicit-def $scc
     ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     renamable $sgpr7 = S_ADD_I32 %stack.1, $sgpr8, implicit-def $scc
     SI_RETURN implicit $sgpr7, implicit $scc
 
 ...
-# FIXME: Fail verifier
-# ---
-# name: s_add_i32__kernel__literal__fi_offset96__offset_literal
-# tracksRegLiveness: true
-# stack:
-#   - { id: 0, size: 96, alignment: 16 }
-#   - { id: 1, size: 128, alignment: 4 }
-# machineFunctionInfo:
-#   scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
-#   frameOffsetReg:  '$sgpr33'
-#   stackPtrOffsetReg: '$sgpr32'
-#   isEntryFunction: true
-# body:             |
-#   bb.0:
-#     renamable $sgpr7 = S_ADD_I32 68, %stack.1, implicit-def dead $scc
-#     SI_RETURN implicit $sgpr7
-# ...
-
-# ---
-# name: s_add_i32__kernel__fi_offset96__offset_literal__literal
-# tracksRegLiveness: true
-# stack:
-#   - { id: 0, size: 96, alignment: 16 }
-#   - { id: 1, size: 128, alignment: 4 }
-# machineFunctionInfo:
-#   scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
-#   frameOffsetReg:  '$sgpr33'
-#   stackPtrOffsetReg: '$sgpr32'
-#   isEntryFunction: true
-# body:             |
-#   bb.0:
-#     renamable $sgpr7 = S_ADD_I32 %stack.1, 68, implicit-def $scc
-#     SI_RETURN implicit $sgpr7, implicit $scc
-
-# ...
+
+---
+name: s_add_i32__kernel__literal__fi_offset96__offset_literal
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 96, alignment: 16 }
+  - { id: 1, size: 128, alignment: 4 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+  isEntryFunction: true
+body:             |
+  bb.0:
+    ; MUBUFW64-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal
+    ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; MUBUFW64-NEXT: {{  $}}
+    ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+    ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 164
+    ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
+    ;
+    ; MUBUFW32-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal
+    ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; MUBUFW32-NEXT: {{  $}}
+    ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+    ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 164
+    ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
+    ;
+    ; FLATSCRW64-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal
+    ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 164
+    ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
+    ;
+    ; FLATSCRW32-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal
+    ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 164
+    ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
+    renamable $sgpr7 = S_ADD_I32 68, %stack.1, implicit-def dead $scc
+    SI_RETURN implicit $sgpr7
+...
+
+---
+name: s_add_i32__kernel__fi_offset96__offset_literal__literal
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 96, alignment: 16 }
+  - { id: 1, size: 128, alignment: 4 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+  isEntryFunction: true
+body:             |
+  bb.0:
+    ; MUBUFW64-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal
+    ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; MUBUFW64-NEXT: {{  $}}
+    ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+    ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 164, implicit-def $scc
+    ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
+    ;
+    ; MUBUFW32-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal
+    ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; MUBUFW32-NEXT: {{  $}}
+    ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+    ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 164, implicit-def $scc
+    ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
+    ;
+    ; FLATSCRW64-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal
+    ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 164, implicit-def $scc
+    ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
+    ;
+    ; FLATSCRW32-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal
+    ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 164, implicit-def $scc
+    ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
+    renamable $sgpr7 = S_ADD_I32 %stack.1, 68, implicit-def $scc
+    SI_RETURN implicit $sgpr7, implicit $scc
+
+...
 
 ---
 name: s_add_i32__kernel__sgpr__fi_literal_offset
@@ -620,27 +654,31 @@ body:             |
     ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc
     ; MUBUFW64: liveins: $sgpr8
     ; MUBUFW64-NEXT: {{  $}}
-    ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
-    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 0, implicit-def $scc
     ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc
     ; MUBUFW32: liveins: $sgpr8
     ; MUBUFW32-NEXT: {{  $}}
-    ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
-    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 0, implicit-def $scc
     ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc
     ; FLATSCRW64: liveins: $sgpr8
     ; FLATSCRW64-NEXT: {{  $}}
-    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, $sgpr32, implicit-def $scc
+    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def $scc
+    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 0, implicit-def $scc
     ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc
     ; FLATSCRW32: liveins: $sgpr8
     ; FLATSCRW32-NEXT: {{  $}}
-    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, $sgpr32, implicit-def $scc
+    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def $scc
+    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 0, implicit-def $scc
     ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.0, implicit-def $scc
     SI_RETURN implicit $sgpr7, implicit $scc
@@ -709,31 +747,31 @@ body:             |
     ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc
     ; MUBUFW64: liveins: $sgpr8
     ; MUBUFW64-NEXT: {{  $}}
-    ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
-    ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc
-    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 96, implicit-def $scc
     ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc
     ; MUBUFW32: liveins: $sgpr8
     ; MUBUFW32-NEXT: {{  $}}
-    ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
-    ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc
-    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 96, implicit-def $scc
     ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc
     ; FLATSCRW64: liveins: $sgpr8
     ; FLATSCRW64-NEXT: {{  $}}
-    ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc
-    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc
+    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def $scc
+    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 96, implicit-def $scc
     ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc
     ; FLATSCRW32: liveins: $sgpr8
     ; FLATSCRW32-NEXT: {{  $}}
-    ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc
-    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc
+    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def $scc
+    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 96, implicit-def $scc
     ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.1, implicit-def $scc
     SI_RETURN implicit $sgpr7, implicit $scc
@@ -754,25 +792,21 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; MUBUFW64-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm
-    ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
-    ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 32, implicit-def $scc
-    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 8, killed $sgpr4, implicit-def $scc
+    ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 40, implicit-def $scc
     ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; MUBUFW32-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm
-    ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
-    ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 32, implicit-def $scc
-    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 8, killed $sgpr4, implicit-def $scc
+    ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 40, implicit-def $scc
     ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW64-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm
-    ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc
-    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 8, killed $sgpr4, implicit-def $scc
+    ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 40, implicit-def $scc
     ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW32-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm
-    ; FLATSCRW32: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc
-    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 8, killed $sgpr4, implicit-def $scc
+    ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 40, implicit-def $scc
     ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     renamable $sgpr7 = S_ADD_I32 8, %stack.1, implicit-def $scc
     SI_RETURN implicit $sgpr7, implicit $scc
@@ -792,25 +826,21 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; MUBUFW64-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm
-    ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
-    ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 32, implicit-def $scc
-    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 8, implicit-def $scc
+    ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr7, implicit-def $scc
     ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; MUBUFW32-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm
-    ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
-    ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 32, implicit-def $scc
-    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 8, implicit-def $scc
+    ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr7, implicit-def $scc
     ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm
-    ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc
-    ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 8, implicit-def $scc
+    ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 40, $sgpr32, implicit-def $scc
     ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm
-    ; FLATSCRW32: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc
-    ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 8, implicit-def $scc
+    ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 40, $sgpr32, implicit-def $scc
     ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     renamable $sgpr7 = S_ADD_I32 %stack.1, 8, implicit-def $scc
     SI_RETURN implicit $sgpr7, implicit $scc
@@ -835,7 +865,7 @@ body:             |
     ; MUBUFW64-NEXT: {{  $}}
     ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
-    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 8, 32, implicit-def $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 40, implicit-def $scc
     ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; MUBUFW32-LABEL: name: s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm
@@ -843,15 +873,15 @@ body:             |
     ; MUBUFW32-NEXT: {{  $}}
     ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
-    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 8, 32, implicit-def $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 40, implicit-def $scc
     ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW64-LABEL: name: s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm
-    ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 8, 32, implicit-def $scc
+    ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 40, implicit-def $scc
     ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW32-LABEL: name: s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm
-    ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 8, 32, implicit-def $scc
+    ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 40, implicit-def $scc
     ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     renamable $sgpr7 = S_ADD_I32 8, %stack.1, implicit-def $scc
     SI_RETURN implicit $sgpr7, implicit $scc
@@ -876,7 +906,7 @@ body:             |
     ; MUBUFW64-NEXT: {{  $}}
     ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
-    ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 32, 8, implicit-def $scc
+    ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 40, implicit-def $scc
     ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; MUBUFW32-LABEL: name: s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm
@@ -884,15 +914,15 @@ body:             |
     ; MUBUFW32-NEXT: {{  $}}
     ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
-    ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 32, 8, implicit-def $scc
+    ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 40, implicit-def $scc
     ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW64-LABEL: name: s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm
-    ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 32, 8, implicit-def $scc
+    ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 40, implicit-def $scc
     ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     ;
     ; FLATSCRW32-LABEL: name: s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm
-    ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 32, 8, implicit-def $scc
+    ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 40, implicit-def $scc
     ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
     renamable $sgpr7 = S_ADD_I32 %stack.1, 8, implicit-def $scc
     SI_RETURN implicit $sgpr7, implicit $scc
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
index 89da9b8e75bc9..9d9d5b239a12c 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
@@ -101,7 +101,6 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) {
 ; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
 ; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
 ; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT:    s_add_co_i32 s0, s0, 0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
 ; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -237,7 +236,6 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) {
 ; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX12-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
 ; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT:    s_add_co_i32 s0, s0, 0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
 ; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -375,7 +373,6 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) {
 ; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX12-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT:    s_add_co_i32 s0, s0, 0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
 ; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -514,8 +511,6 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) {
 ; GFX12-SDAG-NEXT:    v_dual_mov_b32 v3, 4 :: v_dual_and_b32 v0, 0x3ff, v0
 ; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-SDAG-NEXT:    s_add_co_i32 s0, s0, 0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
 ; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -652,11 +647,10 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) {
 ; GFX12-SDAG-NEXT:    s_load_b32 s0, s[2:3], 0x24
 ; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
 ; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX12-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
 ; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX12-SDAG-NEXT:    s_add_co_i32 s0, s0, 0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
 ; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -795,11 +789,10 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) {
 ; GFX12-SDAG-NEXT:    s_load_b32 s0, s[2:3], 0x24
 ; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
 ; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX12-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
-; GFX12-SDAG-NEXT:    s_add_co_i32 s0, s0, 0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
 ; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -939,8 +932,6 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) {
 ; GFX12-SDAG-NEXT:    v_dual_mov_b32 v3, 4 :: v_dual_and_b32 v0, 0x3ff, v0
 ; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-SDAG-NEXT:    s_add_co_i32 s0, s0, 0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
 ; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -1077,11 +1068,10 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) {
 ; GFX12-SDAG-NEXT:    s_load_b32 s0, s[2:3], 0x24
 ; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
 ; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX12-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
 ; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX12-SDAG-NEXT:    s_add_co_i32 s0, s0, 0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
 ; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -1219,11 +1209,10 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) {
 ; GFX12-SDAG-NEXT:    s_load_b32 s0, s[2:3], 0x24
 ; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
 ; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX12-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX12-SDAG-NEXT:    s_add_co_i32 s0, s0, 0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
 ; GFX12-SDAG-NEXT:    s_wait_storecnt 0x0
 ; GFX12-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index 14d8b71c5167a..9ddaa52234daa 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -381,11 +381,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_lshl_b32 s1, s0, 2
 ; GFX9-NEXT:    s_and_b32 s0, s0, 15
-; GFX9-NEXT:    s_add_i32 s1, s1, 0
 ; GFX9-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX9-NEXT:    scratch_store_dword off, v0, s1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_add_i32 s0, s0, 0
 ; GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_endpgm
@@ -402,8 +400,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX10-NEXT:    s_and_b32 s1, s0, 15
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX10-NEXT:    s_lshl_b32 s1, s1, 2
-; GFX10-NEXT:    s_add_i32 s0, s0, 0
-; GFX10-NEXT:    s_add_i32 s1, s1, 0
 ; GFX10-NEXT:    scratch_store_dword off, v0, s0
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_load_dword v0, off, s1 glc dlc
@@ -418,8 +414,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX11-NEXT:    s_and_b32 s1, s0, 15
 ; GFX11-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX11-NEXT:    s_lshl_b32 s1, s1, 2
-; GFX11-NEXT:    s_add_i32 s0, s0, 0
-; GFX11-NEXT:    s_add_i32 s1, s1, 0
 ; GFX11-NEXT:    scratch_store_b32 off, v0, s0 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    scratch_load_b32 v0, off, s1 glc dlc
@@ -434,8 +428,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX12-NEXT:    s_and_b32 s1, s0, 15
 ; GFX12-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX12-NEXT:    s_lshl_b32 s1, s1, 2
-; GFX12-NEXT:    s_add_co_i32 s0, s0, 0
-; GFX12-NEXT:    s_add_co_i32 s1, s1, 0
 ; GFX12-NEXT:    scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
 ; GFX12-NEXT:    s_wait_storecnt 0x0
 ; GFX12-NEXT:    scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -455,11 +447,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
 ; GFX9-PAL-NEXT:    s_lshl_b32 s1, s0, 2
 ; GFX9-PAL-NEXT:    s_and_b32 s0, s0, 15
-; GFX9-PAL-NEXT:    s_add_i32 s1, s1, 0
 ; GFX9-PAL-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX9-PAL-NEXT:    scratch_store_dword off, v0, s1
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT:    s_add_i32 s0, s0, 0
 ; GFX9-PAL-NEXT:    scratch_load_dword v0, off, s0 glc
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
@@ -471,11 +461,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
 ; GFX940-NEXT:    s_and_b32 s0, s0, 15
-; GFX940-NEXT:    s_add_i32 s1, s1, 0
 ; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_add_i32 s0, s0, 0
 ; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_endpgm
@@ -497,8 +485,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX10-PAL-NEXT:    s_and_b32 s1, s0, 15
 ; GFX10-PAL-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX10-PAL-NEXT:    s_lshl_b32 s1, s1, 2
-; GFX10-PAL-NEXT:    s_add_i32 s0, s0, 0
-; GFX10-PAL-NEXT:    s_add_i32 s1, s1, 0
 ; GFX10-PAL-NEXT:    scratch_store_dword off, v0, s0
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    scratch_load_dword v0, off, s1 glc dlc
@@ -513,8 +499,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX11-PAL-NEXT:    s_and_b32 s1, s0, 15
 ; GFX11-PAL-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX11-PAL-NEXT:    s_lshl_b32 s1, s1, 2
-; GFX11-PAL-NEXT:    s_add_i32 s0, s0, 0
-; GFX11-PAL-NEXT:    s_add_i32 s1, s1, 0
 ; GFX11-PAL-NEXT:    scratch_store_b32 off, v0, s0 dlc
 ; GFX11-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-PAL-NEXT:    scratch_load_b32 v0, off, s1 glc dlc
@@ -529,8 +513,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX12-PAL-NEXT:    s_and_b32 s1, s0, 15
 ; GFX12-PAL-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX12-PAL-NEXT:    s_lshl_b32 s1, s1, 2
-; GFX12-PAL-NEXT:    s_add_co_i32 s0, s0, 0
-; GFX12-PAL-NEXT:    s_add_co_i32 s1, s1, 0
 ; GFX12-PAL-NEXT:    scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
 ; GFX12-PAL-NEXT:    s_wait_storecnt 0x0
 ; GFX12-PAL-NEXT:    scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -552,13 +534,11 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
 ; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
 ; GFX9-NEXT:    s_lshl_b32 s0, s2, 2
-; GFX9-NEXT:    s_add_i32 s0, s0, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 15
 ; GFX9-NEXT:    scratch_store_dword off, v0, s0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_and_b32 s0, s2, 15
 ; GFX9-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX9-NEXT:    s_add_i32 s0, s0, 0
 ; GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_endpgm
@@ -573,8 +553,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
 ; GFX10-NEXT:    s_and_b32 s0, s2, 15
 ; GFX10-NEXT:    s_lshl_b32 s1, s2, 2
 ; GFX10-NEXT:    s_lshl_b32 s0, s0, 2
-; GFX10-NEXT:    s_add_i32 s1, s1, 0
-; GFX10-NEXT:    s_add_i32 s0, s0, 0
 ; GFX10-NEXT:    scratch_store_dword off, v0, s1
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_load_dword v0, off, s0 glc dlc
@@ -587,8 +565,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
 ; GFX11-NEXT:    s_and_b32 s1, s0, 15
 ; GFX11-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX11-NEXT:    s_lshl_b32 s1, s1, 2
-; GFX11-NEXT:    s_add_i32 s0, s0, 0
-; GFX11-NEXT:    s_add_i32 s1, s1, 0
 ; GFX11-NEXT:    scratch_store_b32 off, v0, s0 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    scratch_load_b32 v0, off, s1 glc dlc
@@ -601,8 +577,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
 ; GFX12-NEXT:    s_and_b32 s1, s0, 15
 ; GFX12-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX12-NEXT:    s_lshl_b32 s1, s1, 2
-; GFX12-NEXT:    s_add_co_i32 s0, s0, 0
-; GFX12-NEXT:    s_add_co_i32 s1, s1, 0
 ; GFX12-NEXT:    scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
 ; GFX12-NEXT:    s_wait_storecnt 0x0
 ; GFX12-NEXT:    scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -621,11 +595,9 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
 ; GFX9-PAL-NEXT:    s_lshl_b32 s1, s0, 2
 ; GFX9-PAL-NEXT:    s_and_b32 s0, s0, 15
-; GFX9-PAL-NEXT:    s_add_i32 s1, s1, 0
-; GFX9-PAL-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX9-PAL-NEXT:    scratch_store_dword off, v0, s1
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT:    s_add_i32 s0, s0, 0
+; GFX9-PAL-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX9-PAL-NEXT:    scratch_load_dword v0, off, s0 glc
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-PAL-NEXT:    s_endpgm
@@ -633,13 +605,11 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
 ; GFX940-LABEL: store_load_sindex_foo:
 ; GFX940:       ; %bb.0: ; %bb
 ; GFX940-NEXT:    s_lshl_b32 s1, s0, 2
-; GFX940-NEXT:    s_and_b32 s0, s0, 15
-; GFX940-NEXT:    s_add_i32 s1, s1, 0
 ; GFX940-NEXT:    v_mov_b32_e32 v0, 15
-; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
+; GFX940-NEXT:    s_and_b32 s0, s0, 15
 ; GFX940-NEXT:    scratch_store_dword off, v0, s1 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
-; GFX940-NEXT:    s_add_i32 s0, s0, 0
+; GFX940-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX940-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_endpgm
@@ -659,8 +629,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
 ; GFX10-PAL-NEXT:    s_and_b32 s1, s0, 15
 ; GFX10-PAL-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX10-PAL-NEXT:    s_lshl_b32 s1, s1, 2
-; GFX10-PAL-NEXT:    s_add_i32 s0, s0, 0
-; GFX10-PAL-NEXT:    s_add_i32 s1, s1, 0
 ; GFX10-PAL-NEXT:    scratch_store_dword off, v0, s0
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    scratch_load_dword v0, off, s1 glc dlc
@@ -673,8 +641,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
 ; GFX11-PAL-NEXT:    s_and_b32 s1, s0, 15
 ; GFX11-PAL-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX11-PAL-NEXT:    s_lshl_b32 s1, s1, 2
-; GFX11-PAL-NEXT:    s_add_i32 s0, s0, 0
-; GFX11-PAL-NEXT:    s_add_i32 s1, s1, 0
 ; GFX11-PAL-NEXT:    scratch_store_b32 off, v0, s0 dlc
 ; GFX11-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-PAL-NEXT:    scratch_load_b32 v0, off, s1 glc dlc
@@ -687,8 +653,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
 ; GFX12-PAL-NEXT:    s_and_b32 s1, s0, 15
 ; GFX12-PAL-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX12-PAL-NEXT:    s_lshl_b32 s1, s1, 2
-; GFX12-PAL-NEXT:    s_add_co_i32 s0, s0, 0
-; GFX12-PAL-NEXT:    s_add_co_i32 s1, s1, 0
 ; GFX12-PAL-NEXT:    scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
 ; GFX12-PAL-NEXT:    s_wait_storecnt 0x0
 ; GFX12-PAL-NEXT:    scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -3693,12 +3657,12 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX9-LABEL: store_load_large_imm_offset_foo:
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, 13
 ; GFX9-NEXT:    s_movk_i32 s0, 0x3000
-; GFX9-NEXT:    s_add_i32 s1, s32, 4
+; GFX9-NEXT:    v_mov_b32_e32 v0, 13
+; GFX9-NEXT:    s_add_i32 s0, s32, s0
 ; GFX9-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_add_i32 s0, s0, s1
+; GFX9-NEXT:    s_add_i32 s0, s0, 4
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 15
 ; GFX9-NEXT:    scratch_store_dword off, v0, s0 offset:3712
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
@@ -3710,10 +3674,10 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX10:       ; %bb.0: ; %bb
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 13
-; GFX10-NEXT:    v_mov_b32_e32 v1, 15
 ; GFX10-NEXT:    s_movk_i32 s0, 0x3800
-; GFX10-NEXT:    s_add_i32 s1, s32, 4
-; GFX10-NEXT:    s_add_i32 s0, s0, s1
+; GFX10-NEXT:    v_mov_b32_e32 v1, 15
+; GFX10-NEXT:    s_add_i32 s0, s32, s0
+; GFX10-NEXT:    s_add_i32 s0, s0, 4
 ; GFX10-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_store_dword off, v1, s0 offset:1664
@@ -3755,12 +3719,12 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX9-PAL-LABEL: store_load_large_imm_offset_foo:
 ; GFX9-PAL:       ; %bb.0: ; %bb
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-PAL-NEXT:    v_mov_b32_e32 v0, 13
 ; GFX9-PAL-NEXT:    s_movk_i32 s0, 0x3000
-; GFX9-PAL-NEXT:    s_add_i32 s1, s32, 4
+; GFX9-PAL-NEXT:    v_mov_b32_e32 v0, 13
+; GFX9-PAL-NEXT:    s_add_i32 s0, s32, s0
 ; GFX9-PAL-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT:    s_add_i32 s0, s0, s1
+; GFX9-PAL-NEXT:    s_add_i32 s0, s0, 4
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v0, 15
 ; GFX9-PAL-NEXT:    scratch_store_dword off, v0, s0 offset:3712
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
@@ -3786,10 +3750,10 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX10-PAL:       ; %bb.0: ; %bb
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-PAL-NEXT:    v_mov_b32_e32 v0, 13
-; GFX10-PAL-NEXT:    v_mov_b32_e32 v1, 15
 ; GFX10-PAL-NEXT:    s_movk_i32 s0, 0x3800
-; GFX10-PAL-NEXT:    s_add_i32 s1, s32, 4
-; GFX10-PAL-NEXT:    s_add_i32 s0, s0, s1
+; GFX10-PAL-NEXT:    v_mov_b32_e32 v1, 15
+; GFX10-PAL-NEXT:    s_add_i32 s0, s32, s0
+; GFX10-PAL-NEXT:    s_add_i32 s0, s0, 4
 ; GFX10-PAL-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    scratch_store_dword off, v1, s0 offset:1664
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index 34c7614ae36f9..132f018548bd7 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -55,8 +55,8 @@ body:             |
     ; GCN-LABEL: name: func_add_constant_to_fi_uniform_i32
     ; GCN: liveins: $sgpr30_sgpr31
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
-    ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_I32 killed $sgpr0, 4, implicit-def dead $scc
+    ; GCN-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_I32 4, $sgpr4, implicit-def dead $scc
     ; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr4, implicit $exec
     ; GCN-NEXT: $m0 = S_MOV_B32 -1
     ; GCN-NEXT: DS_WRITE_B32 undef renamable $vgpr0, killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index 49531e3b4f8f3..330fcfd45dd69 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -161,8 +161,8 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) {
 ; FLATSCR-NEXT:    s_mov_b32 s0, 0
 ; FLATSCR-NEXT:  .LBB1_1: ; %loadstoreloop
 ; FLATSCR-NEXT:    ; =>This Inner Loop Header: Depth=1
-; FLATSCR-NEXT:    s_add_i32 s3, s33, 0x3000
-; FLATSCR-NEXT:    s_add_i32 s1, s0, s3
+; FLATSCR-NEXT:    s_add_i32 s1, s33, s0
+; FLATSCR-NEXT:    s_addk_i32 s1, 0x3000
 ; FLATSCR-NEXT:    s_add_i32 s0, s0, 1
 ; FLATSCR-NEXT:    s_cmpk_lt_u32 s0, 0x2120
 ; FLATSCR-NEXT:    scratch_store_byte off, v2, s1
@@ -170,8 +170,8 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) {
 ; FLATSCR-NEXT:    s_cbranch_scc1 .LBB1_1
 ; FLATSCR-NEXT:  ; %bb.2: ; %split
 ; FLATSCR-NEXT:    s_movk_i32 s0, 0x2000
-; FLATSCR-NEXT:    s_add_i32 s1, s33, 0x3000
-; FLATSCR-NEXT:    s_add_i32 s0, s0, s1
+; FLATSCR-NEXT:    s_add_i32 s0, s33, s0
+; FLATSCR-NEXT:    s_addk_i32 s0, 0x3000
 ; FLATSCR-NEXT:    scratch_load_dwordx2 v[2:3], off, s0 offset:208 glc
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    s_add_i32 s0, s33, 0x3000



More information about the llvm-commits mailing list