[llvm] AMDGPU: Handle folding frame indexes into s_add_i32 (PR #101694)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 7 10:13:41 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/101694
>From 18de2ef43b08312bd3c27e92ed2f5e62d2c11ac3 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 1 Aug 2024 23:11:45 +0400
Subject: [PATCH 1/4] AMDGPU: Handle folding frame indexes into s_add_i32
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 63 ++++
.../CodeGen/AMDGPU/GlobalISel/flat-scratch.ll | 34 +-
.../eliminate-frame-index-s-add-i32.mir | 356 ++++++++++--------
llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll | 19 +-
llvm/test/CodeGen/AMDGPU/flat-scratch.ll | 66 +---
llvm/test/CodeGen/AMDGPU/frame-index.mir | 4 +-
.../local-stack-alloc-block-sp-reference.ll | 8 +-
7 files changed, 293 insertions(+), 257 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index dd4e0d53202d4e..43054329304f06 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2268,7 +2268,70 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MI->eraseFromParent();
return true;
}
+ case AMDGPU::S_ADD_I32: {
+ // TODO: Handle s_or_b32, s_and_b32.
+ MachineOperand &OtherOp = MI->getOperand(FIOperandNum == 1 ? 2 : 1);
+ assert(FrameReg || MFI->isBottomOfStack());
+
+ MachineOperand &DstOp = MI->getOperand(0);
+ const DebugLoc &DL = MI->getDebugLoc();
+ Register MaterializedReg = FrameReg;
+
+ // Defend against live scc, which should never happen in practice.
+ bool DeadSCC = MI->getOperand(3).isDead();
+
+ // Do an in-place scale of the wave offset to the lane offset.
+ if (FrameReg && !ST.enableFlatScratch()) {
+ BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_LSHR_B32))
+ .addDef(DstOp.getReg(), RegState::Renamable)
+ .addReg(FrameReg)
+ .addImm(ST.getWavefrontSizeLog2())
+ .setOperandDead(3); // Set SCC dead
+ MaterializedReg = DstOp.getReg();
+ }
+
+ // If we can't fold the other operand, do another increment.
+ if (!OtherOp.isImm() && MaterializedReg) {
+ auto AddI32 = BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_ADD_I32))
+ .addDef(DstOp.getReg(), RegState::Renamable)
+ .addReg(MaterializedReg)
+ .add(OtherOp);
+ if (DeadSCC)
+ AddI32.setOperandDead(3);
+ MaterializedReg = DstOp.getReg();
+ }
+
+ int64_t NewOffset = FrameInfo.getObjectOffset(Index);
+
+ // For the non-immediate case, we could fall through to the default
+ // handling, but we do an in-place update of the result register here to
+ // avoid scavenging another register.
+ if (OtherOp.isImm())
+ NewOffset += OtherOp.getImm();
+
+ if (NewOffset == 0 && DeadSCC) {
+ MI->eraseFromParent();
+ } else if (!MaterializedReg && OtherOp.isImm()) {
+ // In a kernel, the address should just be an immediate.
+ // SCC should really be dead, but preserve the def just in case it
+ // isn't.
+ if (DeadSCC)
+ MI->removeOperand(3);
+ else
+ MI->getOperand(3).setIsDef(true);
+
+ MI->removeOperand(2);
+ MI->getOperand(1).ChangeToImmediate(NewOffset);
+ MI->setDesc(TII->get(AMDGPU::S_MOV_B32));
+ } else {
+ if (MaterializedReg)
+ OtherOp.ChangeToRegister(MaterializedReg, false);
+ FIOp.ChangeToImmediate(NewOffset);
+ }
+
+ return true;
+ }
default: {
// Other access to frame index
const DebugLoc &DL = MI->getDebugLoc();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index f4fd803c8dda89..5ea88ee6b33aef 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -15,11 +15,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
; GFX9-NEXT: s_and_b32 s0, s0, 15
-; GFX9-NEXT: s_add_i32 s1, s1, 0
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-NEXT: scratch_store_dword off, v0, s1
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_add_i32 s0, s0, 0
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
@@ -36,8 +34,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX10-NEXT: s_and_b32 s1, s0, 15
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
-; GFX10-NEXT: s_add_i32 s0, s0, 0
-; GFX10-NEXT: s_add_i32 s1, s1, 0
; GFX10-NEXT: scratch_store_dword off, v0, s0
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -51,11 +47,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_lshl_b32 s1, s0, 2
; GFX940-NEXT: s_and_b32 s0, s0, 15
-; GFX940-NEXT: s_add_i32 s1, s1, 0
; GFX940-NEXT: s_lshl_b32 s0, s0, 2
; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: s_add_i32 s0, s0, 0
; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
@@ -68,8 +62,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX11-NEXT: s_and_b32 s1, s0, 15
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-NEXT: s_lshl_b32 s1, s1, 2
-; GFX11-NEXT: s_add_i32 s0, s0, 0
-; GFX11-NEXT: s_add_i32 s1, s1, 0
; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc
@@ -84,8 +76,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX12-NEXT: s_and_b32 s1, s0, 15
; GFX12-NEXT: s_lshl_b32 s0, s0, 2
; GFX12-NEXT: s_lshl_b32 s1, s1, 2
-; GFX12-NEXT: s_add_co_i32 s0, s0, 0
-; GFX12-NEXT: s_add_co_i32 s1, s1, 0
; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -1042,13 +1032,13 @@ define void @store_load_large_imm_offset_foo() {
; GFX9-LABEL: store_load_large_imm_offset_foo:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, 13
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
-; GFX9-NEXT: s_add_i32 s1, s32, 4
+; GFX9-NEXT: v_mov_b32_e32 v0, 13
+; GFX9-NEXT: s_add_i32 s0, s32, s0
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, 15
-; GFX9-NEXT: s_add_i32 s0, s0, s1
+; GFX9-NEXT: s_add_i32 s0, s0, 4
; GFX9-NEXT: scratch_store_dword off, v0, s0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1059,10 +1049,10 @@ define void @store_load_large_imm_offset_foo() {
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, 13
-; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
-; GFX10-NEXT: s_add_i32 s1, s32, 4
-; GFX10-NEXT: s_add_i32 s0, s0, s1
+; GFX10-NEXT: v_mov_b32_e32 v1, 15
+; GFX10-NEXT: s_add_i32 s0, s32, s0
+; GFX10-NEXT: s_add_i32 s0, s0, 4
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1074,13 +1064,13 @@ define void @store_load_large_imm_offset_foo() {
; GFX940-LABEL: store_load_large_imm_offset_foo:
; GFX940: ; %bb.0: ; %bb
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, 13
; GFX940-NEXT: s_movk_i32 s0, 0x3e80
-; GFX940-NEXT: s_add_i32 s1, s32, 4
+; GFX940-NEXT: v_mov_b32_e32 v0, 13
+; GFX940-NEXT: s_add_i32 s0, s32, s0
; GFX940-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: v_mov_b32_e32 v0, 15
-; GFX940-NEXT: s_add_i32 s0, s0, s1
+; GFX940-NEXT: s_add_i32 s0, s0, 4
; GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1092,9 +1082,9 @@ define void @store_load_large_imm_offset_foo() {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
; GFX11-NEXT: s_movk_i32 s0, 0x3e80
-; GFX11-NEXT: s_add_i32 s1, s32, 4
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: s_add_i32 s0, s0, s1
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_add_i32 s0, s32, s0
+; GFX11-NEXT: s_add_i32 s0, s0, 4
; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
index a09b39069e5c9a..f4742bb7f3ebce 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
@@ -21,21 +21,21 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__inline_imm__fi_offset0
- ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def dead $scc
+ ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 12, implicit-def dead $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__inline_imm__fi_offset0
- ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def dead $scc
+ ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 12, implicit-def dead $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW64-LABEL: name: s_add_i32__inline_imm__fi_offset0
- ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 12, $sgpr32, implicit-def dead $scc
+ ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 12, implicit-def dead $scc
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW32-LABEL: name: s_add_i32__inline_imm__fi_offset0
- ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 12, $sgpr32, implicit-def dead $scc
+ ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 12, implicit-def dead $scc
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
renamable $sgpr7 = S_ADD_I32 12, %stack.0, implicit-def dead $scc
SI_RETURN implicit $sgpr7
@@ -54,21 +54,21 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__inline_imm
- ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 12, implicit-def dead $scc
+ ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr7, implicit-def dead $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__inline_imm
- ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 12, implicit-def dead $scc
+ ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr7, implicit-def dead $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__inline_imm
- ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 12, implicit-def dead $scc
+ ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 12, $sgpr32, implicit-def dead $scc
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__inline_imm
- ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 12, implicit-def dead $scc
+ ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 12, $sgpr32, implicit-def dead $scc
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
renamable $sgpr7 = S_ADD_I32 %stack.0, 12, implicit-def dead $scc
SI_RETURN implicit $sgpr7
@@ -88,25 +88,21 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm
- ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
- ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 16, implicit-def $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def $scc
+ ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 28, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm
- ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
- ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 16, implicit-def $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def $scc
+ ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 28, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW64-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm
- ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 16, implicit-def $scc
- ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def $scc
+ ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 28, implicit-def $scc
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW32-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm
- ; FLATSCRW32: $sgpr4 = S_ADD_I32 $sgpr32, 16, implicit-def $scc
- ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def $scc
+ ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 28, implicit-def $scc
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
renamable $sgpr7 = S_ADD_I32 12, %stack.1, implicit-def $scc
SI_RETURN implicit $sgpr7
@@ -125,21 +121,21 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__literal__fi_offset0
- ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def dead $scc
+ ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 68, implicit-def dead $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__literal__fi_offset0
- ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def dead $scc
+ ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 68, implicit-def dead $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW64-LABEL: name: s_add_i32__literal__fi_offset0
- ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 68, $sgpr32, implicit-def dead $scc
+ ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 68, implicit-def dead $scc
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW32-LABEL: name: s_add_i32__literal__fi_offset0
- ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 68, $sgpr32, implicit-def dead $scc
+ ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 68, implicit-def dead $scc
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
renamable $sgpr7 = S_ADD_I32 68, %stack.0, implicit-def dead $scc
SI_RETURN implicit $sgpr7
@@ -158,21 +154,21 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__literal
- ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc
+ ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr7, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__literal
- ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc
+ ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr7, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__literal
- ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 68, implicit-def $scc
+ ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 68, $sgpr32, implicit-def $scc
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__literal
- ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 68, implicit-def $scc
+ ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 68, $sgpr32, implicit-def $scc
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
renamable $sgpr7 = S_ADD_I32 %stack.0, 68, implicit-def $scc
SI_RETURN implicit $sgpr7
@@ -192,25 +188,21 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__literal__fi_offset96
- ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
- ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def $scc
+ ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 164, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; MUBUFW32-LABEL: name: s_add_i32__literal__fi_offset96
- ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
- ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def $scc
+ ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 164, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW64-LABEL: name: s_add_i32__literal__fi_offset96
- ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc
- ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def $scc
+ ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 164, implicit-def $scc
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW32-LABEL: name: s_add_i32__literal__fi_offset96
- ; FLATSCRW32: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc
- ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def $scc
+ ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 164, implicit-def $scc
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
renamable $sgpr7 = S_ADD_I32 68, %stack.1, implicit-def $scc
SI_RETURN implicit $sgpr7, implicit $scc
@@ -230,25 +222,21 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32____fi_offset96__literal
- ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
- ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc
+ ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr7, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; MUBUFW32-LABEL: name: s_add_i32____fi_offset96__literal
- ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
- ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc
+ ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr7, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW64-LABEL: name: s_add_i32____fi_offset96__literal
- ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc
- ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc
+ ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 164, $sgpr32, implicit-def $scc
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW32-LABEL: name: s_add_i32____fi_offset96__literal
- ; FLATSCRW32: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc
- ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc
+ ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 164, $sgpr32, implicit-def $scc
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
renamable $sgpr7 = S_ADD_I32 %stack.1, 68, implicit-def $scc
SI_RETURN implicit $sgpr7, implicit $scc
@@ -270,27 +258,27 @@ body: |
; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_offset0
; MUBUFW64: liveins: $sgpr8
; MUBUFW64-NEXT: {{ $}}
- ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_offset0
; MUBUFW32: liveins: $sgpr8
; MUBUFW32-NEXT: {{ $}}
- ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_offset0
; FLATSCRW64: liveins: $sgpr8
; FLATSCRW64-NEXT: {{ $}}
- ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, $sgpr32, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_offset0
; FLATSCRW32: liveins: $sgpr8
; FLATSCRW32-NEXT: {{ $}}
- ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, $sgpr32, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc
SI_RETURN implicit $sgpr7
@@ -312,15 +300,15 @@ body: |
; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__sgpr
; MUBUFW64: liveins: $sgpr8
; MUBUFW64-NEXT: {{ $}}
- ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__sgpr
; MUBUFW32: liveins: $sgpr8
; MUBUFW32-NEXT: {{ $}}
- ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__sgpr
@@ -355,31 +343,31 @@ body: |
; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset
; MUBUFW64: liveins: $sgpr8
; MUBUFW64-NEXT: {{ $}}
- ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
- ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 80, implicit-def dead $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset
; MUBUFW32: liveins: $sgpr8
; MUBUFW32-NEXT: {{ $}}
- ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
- ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 80, implicit-def dead $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset
; FLATSCRW64: liveins: $sgpr8
; FLATSCRW64-NEXT: {{ $}}
- ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc
- ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 80, implicit-def dead $scc
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset
; FLATSCRW32: liveins: $sgpr8
; FLATSCRW32-NEXT: {{ $}}
- ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc
- ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 80, implicit-def dead $scc
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.1, implicit-def dead $scc
SI_RETURN implicit $sgpr7
@@ -402,71 +390,117 @@ body: |
; MUBUFW64-LABEL: name: s_add_i32__fi_literal_offset__sgpr
; MUBUFW64: liveins: $sgpr8
; MUBUFW64-NEXT: {{ $}}
- ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
- ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 80, $sgpr7, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; MUBUFW32-LABEL: name: s_add_i32__fi_literal_offset__sgpr
; MUBUFW32: liveins: $sgpr8
; MUBUFW32-NEXT: {{ $}}
- ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
- ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 80, $sgpr7, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW64-LABEL: name: s_add_i32__fi_literal_offset__sgpr
; FLATSCRW64: liveins: $sgpr8
; FLATSCRW64-NEXT: {{ $}}
- ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc
- ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc
+ ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def $scc
+ ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 80, $sgpr7, implicit-def $scc
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW32-LABEL: name: s_add_i32__fi_literal_offset__sgpr
; FLATSCRW32: liveins: $sgpr8
; FLATSCRW32-NEXT: {{ $}}
- ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc
- ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc
+ ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def $scc
+ ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 80, $sgpr7, implicit-def $scc
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
renamable $sgpr7 = S_ADD_I32 %stack.1, $sgpr8, implicit-def $scc
SI_RETURN implicit $sgpr7, implicit $scc
...
-# FIXME: Fail verifier
-# ---
-# name: s_add_i32__kernel__literal__fi_offset96__offset_literal
-# tracksRegLiveness: true
-# stack:
-# - { id: 0, size: 96, alignment: 16 }
-# - { id: 1, size: 128, alignment: 4 }
-# machineFunctionInfo:
-# scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
-# frameOffsetReg: '$sgpr33'
-# stackPtrOffsetReg: '$sgpr32'
-# isEntryFunction: true
-# body: |
-# bb.0:
-# renamable $sgpr7 = S_ADD_I32 68, %stack.1, implicit-def dead $scc
-# SI_RETURN implicit $sgpr7
-# ...
-
-# ---
-# name: s_add_i32__kernel__fi_offset96__offset_literal__literal
-# tracksRegLiveness: true
-# stack:
-# - { id: 0, size: 96, alignment: 16 }
-# - { id: 1, size: 128, alignment: 4 }
-# machineFunctionInfo:
-# scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
-# frameOffsetReg: '$sgpr33'
-# stackPtrOffsetReg: '$sgpr32'
-# isEntryFunction: true
-# body: |
-# bb.0:
-# renamable $sgpr7 = S_ADD_I32 %stack.1, 68, implicit-def $scc
-# SI_RETURN implicit $sgpr7, implicit $scc
-
-# ...
+
+---
+name: s_add_i32__kernel__literal__fi_offset96__offset_literal
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 96, alignment: 16 }
+ - { id: 1, size: 128, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ ; MUBUFW64-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal
+ ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 164
+ ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; MUBUFW32-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal
+ ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 164
+ ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; FLATSCRW64-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal
+ ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 164
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; FLATSCRW32-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal
+ ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 164
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
+ renamable $sgpr7 = S_ADD_I32 68, %stack.1, implicit-def dead $scc
+ SI_RETURN implicit $sgpr7
+...
+
+---
+name: s_add_i32__kernel__fi_offset96__offset_literal__literal
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 96, alignment: 16 }
+ - { id: 1, size: 128, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ ; MUBUFW64-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal
+ ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 164, implicit-def $scc
+ ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
+ ;
+ ; MUBUFW32-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal
+ ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 164, implicit-def $scc
+ ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
+ ;
+ ; FLATSCRW64-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal
+ ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 164, implicit-def $scc
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
+ ;
+ ; FLATSCRW32-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal
+ ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 164, implicit-def $scc
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
+ renamable $sgpr7 = S_ADD_I32 %stack.1, 68, implicit-def $scc
+ SI_RETURN implicit $sgpr7, implicit $scc
+
+...
---
name: s_add_i32__kernel__sgpr__fi_literal_offset
@@ -620,27 +654,31 @@ body: |
; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc
; MUBUFW64: liveins: $sgpr8
; MUBUFW64-NEXT: {{ $}}
- ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 0, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc
; MUBUFW32: liveins: $sgpr8
; MUBUFW32-NEXT: {{ $}}
- ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 0, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc
; FLATSCRW64: liveins: $sgpr8
; FLATSCRW64-NEXT: {{ $}}
- ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, $sgpr32, implicit-def $scc
+ ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def $scc
+ ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 0, implicit-def $scc
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc
; FLATSCRW32: liveins: $sgpr8
; FLATSCRW32-NEXT: {{ $}}
- ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, $sgpr32, implicit-def $scc
+ ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def $scc
+ ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 0, implicit-def $scc
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.0, implicit-def $scc
SI_RETURN implicit $sgpr7, implicit $scc
@@ -709,31 +747,31 @@ body: |
; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc
; MUBUFW64: liveins: $sgpr8
; MUBUFW64-NEXT: {{ $}}
- ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
- ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 96, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc
; MUBUFW32: liveins: $sgpr8
; MUBUFW32-NEXT: {{ $}}
- ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
- ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 96, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc
; FLATSCRW64: liveins: $sgpr8
; FLATSCRW64-NEXT: {{ $}}
- ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc
- ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc
+ ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def $scc
+ ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 96, implicit-def $scc
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc
; FLATSCRW32: liveins: $sgpr8
; FLATSCRW32-NEXT: {{ $}}
- ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc
- ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc
+ ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def $scc
+ ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 96, implicit-def $scc
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.1, implicit-def $scc
SI_RETURN implicit $sgpr7, implicit $scc
@@ -754,25 +792,21 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm
- ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
- ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 32, implicit-def $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 8, killed $sgpr4, implicit-def $scc
+ ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 40, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; MUBUFW32-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm
- ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
- ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 32, implicit-def $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 8, killed $sgpr4, implicit-def $scc
+ ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 40, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW64-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm
- ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc
- ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 8, killed $sgpr4, implicit-def $scc
+ ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 40, implicit-def $scc
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW32-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm
- ; FLATSCRW32: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc
- ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 8, killed $sgpr4, implicit-def $scc
+ ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 40, implicit-def $scc
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
renamable $sgpr7 = S_ADD_I32 8, %stack.1, implicit-def $scc
SI_RETURN implicit $sgpr7, implicit $scc
@@ -792,25 +826,21 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm
- ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
- ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 32, implicit-def $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 8, implicit-def $scc
+ ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr7, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; MUBUFW32-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm
- ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
- ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 32, implicit-def $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 8, implicit-def $scc
+ ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr7, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW64-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm
- ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc
- ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 8, implicit-def $scc
+ ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 40, $sgpr32, implicit-def $scc
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW32-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm
- ; FLATSCRW32: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc
- ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 8, implicit-def $scc
+ ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 40, $sgpr32, implicit-def $scc
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
renamable $sgpr7 = S_ADD_I32 %stack.1, 8, implicit-def $scc
SI_RETURN implicit $sgpr7, implicit $scc
@@ -835,7 +865,7 @@ body: |
; MUBUFW64-NEXT: {{ $}}
; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 8, 32, implicit-def $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 40, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; MUBUFW32-LABEL: name: s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm
@@ -843,15 +873,15 @@ body: |
; MUBUFW32-NEXT: {{ $}}
; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 8, 32, implicit-def $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 40, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW64-LABEL: name: s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm
- ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 8, 32, implicit-def $scc
+ ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 40, implicit-def $scc
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW32-LABEL: name: s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm
- ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 8, 32, implicit-def $scc
+ ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 40, implicit-def $scc
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
renamable $sgpr7 = S_ADD_I32 8, %stack.1, implicit-def $scc
SI_RETURN implicit $sgpr7, implicit $scc
@@ -876,7 +906,7 @@ body: |
; MUBUFW64-NEXT: {{ $}}
; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 32, 8, implicit-def $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 40, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; MUBUFW32-LABEL: name: s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm
@@ -884,15 +914,15 @@ body: |
; MUBUFW32-NEXT: {{ $}}
; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 32, 8, implicit-def $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 40, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW64-LABEL: name: s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm
- ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 32, 8, implicit-def $scc
+ ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 40, implicit-def $scc
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW32-LABEL: name: s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm
- ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 32, 8, implicit-def $scc
+ ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 40, implicit-def $scc
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
renamable $sgpr7 = S_ADD_I32 %stack.1, 8, implicit-def $scc
SI_RETURN implicit $sgpr7, implicit $scc
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
index 89da9b8e75bc9c..9d9d5b239a12c8 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
@@ -101,7 +101,6 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) {
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -237,7 +236,6 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) {
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -375,7 +373,6 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) {
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -514,8 +511,6 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) {
; GFX12-SDAG-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 1
-; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -652,11 +647,10 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) {
; GFX12-SDAG-NEXT: s_load_b32 s0, s[2:3], 0x24
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 1
-; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -795,11 +789,10 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) {
; GFX12-SDAG-NEXT: s_load_b32 s0, s[2:3], 0x24
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 1
-; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -939,8 +932,6 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) {
; GFX12-SDAG-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 2
-; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -1077,11 +1068,10 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) {
; GFX12-SDAG-NEXT: s_load_b32 s0, s[2:3], 0x24
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 2
-; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -1219,11 +1209,10 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) {
; GFX12-SDAG-NEXT: s_load_b32 s0, s[2:3], 0x24
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 2
-; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index 14d8b71c5167a2..9ddaa52234daa2 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -381,11 +381,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
; GFX9-NEXT: s_and_b32 s0, s0, 15
-; GFX9-NEXT: s_add_i32 s1, s1, 0
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-NEXT: scratch_store_dword off, v0, s1
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_add_i32 s0, s0, 0
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
@@ -402,8 +400,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX10-NEXT: s_and_b32 s1, s0, 15
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
-; GFX10-NEXT: s_add_i32 s0, s0, 0
-; GFX10-NEXT: s_add_i32 s1, s1, 0
; GFX10-NEXT: scratch_store_dword off, v0, s0
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -418,8 +414,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX11-NEXT: s_and_b32 s1, s0, 15
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-NEXT: s_lshl_b32 s1, s1, 2
-; GFX11-NEXT: s_add_i32 s0, s0, 0
-; GFX11-NEXT: s_add_i32 s1, s1, 0
; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc
@@ -434,8 +428,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX12-NEXT: s_and_b32 s1, s0, 15
; GFX12-NEXT: s_lshl_b32 s0, s0, 2
; GFX12-NEXT: s_lshl_b32 s1, s1, 2
-; GFX12-NEXT: s_add_co_i32 s0, s0, 0
-; GFX12-NEXT: s_add_co_i32 s1, s1, 0
; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -455,11 +447,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s11, 0
; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2
; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15
-; GFX9-PAL-NEXT: s_add_i32 s1, s1, 0
; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT: s_add_i32 s0, s0, 0
; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
@@ -471,11 +461,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_lshl_b32 s1, s0, 2
; GFX940-NEXT: s_and_b32 s0, s0, 15
-; GFX940-NEXT: s_add_i32 s1, s1, 0
; GFX940-NEXT: s_lshl_b32 s0, s0, 2
; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: s_add_i32 s0, s0, 0
; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
@@ -497,8 +485,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX10-PAL-NEXT: s_and_b32 s1, s0, 15
; GFX10-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-PAL-NEXT: s_lshl_b32 s1, s1, 2
-; GFX10-PAL-NEXT: s_add_i32 s0, s0, 0
-; GFX10-PAL-NEXT: s_add_i32 s1, s1, 0
; GFX10-PAL-NEXT: scratch_store_dword off, v0, s0
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -513,8 +499,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX11-PAL-NEXT: s_and_b32 s1, s0, 15
; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-PAL-NEXT: s_lshl_b32 s1, s1, 2
-; GFX11-PAL-NEXT: s_add_i32 s0, s0, 0
-; GFX11-PAL-NEXT: s_add_i32 s1, s1, 0
; GFX11-PAL-NEXT: scratch_store_b32 off, v0, s0 dlc
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s1 glc dlc
@@ -529,8 +513,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX12-PAL-NEXT: s_and_b32 s1, s0, 15
; GFX12-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX12-PAL-NEXT: s_lshl_b32 s1, s1, 2
-; GFX12-PAL-NEXT: s_add_co_i32 s0, s0, 0
-; GFX12-PAL-NEXT: s_add_co_i32 s1, s1, 0
; GFX12-PAL-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
; GFX12-PAL-NEXT: s_wait_storecnt 0x0
; GFX12-PAL-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -552,13 +534,11 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
; GFX9-NEXT: s_lshl_b32 s0, s2, 2
-; GFX9-NEXT: s_add_i32 s0, s0, 0
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: scratch_store_dword off, v0, s0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_and_b32 s0, s2, 15
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
-; GFX9-NEXT: s_add_i32 s0, s0, 0
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
@@ -573,8 +553,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX10-NEXT: s_and_b32 s0, s2, 15
; GFX10-NEXT: s_lshl_b32 s1, s2, 2
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
-; GFX10-NEXT: s_add_i32 s1, s1, 0
-; GFX10-NEXT: s_add_i32 s0, s0, 0
; GFX10-NEXT: scratch_store_dword off, v0, s1
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc
@@ -587,8 +565,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX11-NEXT: s_and_b32 s1, s0, 15
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-NEXT: s_lshl_b32 s1, s1, 2
-; GFX11-NEXT: s_add_i32 s0, s0, 0
-; GFX11-NEXT: s_add_i32 s1, s1, 0
; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc
@@ -601,8 +577,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX12-NEXT: s_and_b32 s1, s0, 15
; GFX12-NEXT: s_lshl_b32 s0, s0, 2
; GFX12-NEXT: s_lshl_b32 s1, s1, 2
-; GFX12-NEXT: s_add_co_i32 s0, s0, 0
-; GFX12-NEXT: s_add_co_i32 s1, s1, 0
; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -621,11 +595,9 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2
; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15
-; GFX9-PAL-NEXT: s_add_i32 s1, s1, 0
-; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT: s_add_i32 s0, s0, 0
+; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
@@ -633,13 +605,11 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX940-LABEL: store_load_sindex_foo:
; GFX940: ; %bb.0: ; %bb
; GFX940-NEXT: s_lshl_b32 s1, s0, 2
-; GFX940-NEXT: s_and_b32 s0, s0, 15
-; GFX940-NEXT: s_add_i32 s1, s1, 0
; GFX940-NEXT: v_mov_b32_e32 v0, 15
-; GFX940-NEXT: s_lshl_b32 s0, s0, 2
+; GFX940-NEXT: s_and_b32 s0, s0, 15
; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: s_add_i32 s0, s0, 0
+; GFX940-NEXT: s_lshl_b32 s0, s0, 2
; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
@@ -659,8 +629,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX10-PAL-NEXT: s_and_b32 s1, s0, 15
; GFX10-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-PAL-NEXT: s_lshl_b32 s1, s1, 2
-; GFX10-PAL-NEXT: s_add_i32 s0, s0, 0
-; GFX10-PAL-NEXT: s_add_i32 s1, s1, 0
; GFX10-PAL-NEXT: scratch_store_dword off, v0, s0
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -673,8 +641,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX11-PAL-NEXT: s_and_b32 s1, s0, 15
; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-PAL-NEXT: s_lshl_b32 s1, s1, 2
-; GFX11-PAL-NEXT: s_add_i32 s0, s0, 0
-; GFX11-PAL-NEXT: s_add_i32 s1, s1, 0
; GFX11-PAL-NEXT: scratch_store_b32 off, v0, s0 dlc
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s1 glc dlc
@@ -687,8 +653,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX12-PAL-NEXT: s_and_b32 s1, s0, 15
; GFX12-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX12-PAL-NEXT: s_lshl_b32 s1, s1, 2
-; GFX12-PAL-NEXT: s_add_co_i32 s0, s0, 0
-; GFX12-PAL-NEXT: s_add_co_i32 s1, s1, 0
; GFX12-PAL-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
; GFX12-PAL-NEXT: s_wait_storecnt 0x0
; GFX12-PAL-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -3693,12 +3657,12 @@ define void @store_load_large_imm_offset_foo() {
; GFX9-LABEL: store_load_large_imm_offset_foo:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, 13
; GFX9-NEXT: s_movk_i32 s0, 0x3000
-; GFX9-NEXT: s_add_i32 s1, s32, 4
+; GFX9-NEXT: v_mov_b32_e32 v0, 13
+; GFX9-NEXT: s_add_i32 s0, s32, s0
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_add_i32 s0, s0, s1
+; GFX9-NEXT: s_add_i32 s0, s0, 4
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -3710,10 +3674,10 @@ define void @store_load_large_imm_offset_foo() {
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, 13
-; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_movk_i32 s0, 0x3800
-; GFX10-NEXT: s_add_i32 s1, s32, 4
-; GFX10-NEXT: s_add_i32 s0, s0, s1
+; GFX10-NEXT: v_mov_b32_e32 v1, 15
+; GFX10-NEXT: s_add_i32 s0, s32, s0
+; GFX10-NEXT: s_add_i32 s0, s0, 4
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664
@@ -3755,12 +3719,12 @@ define void @store_load_large_imm_offset_foo() {
; GFX9-PAL-LABEL: store_load_large_imm_offset_foo:
; GFX9-PAL: ; %bb.0: ; %bb
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13
; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000
-; GFX9-PAL-NEXT: s_add_i32 s1, s32, 4
+; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13
+; GFX9-PAL-NEXT: s_add_i32 s0, s32, s0
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT: s_add_i32 s0, s0, s1
+; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
@@ -3786,10 +3750,10 @@ define void @store_load_large_imm_offset_foo() {
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 13
-; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
; GFX10-PAL-NEXT: s_movk_i32 s0, 0x3800
-; GFX10-PAL-NEXT: s_add_i32 s1, s32, 4
-; GFX10-PAL-NEXT: s_add_i32 s0, s0, s1
+; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
+; GFX10-PAL-NEXT: s_add_i32 s0, s32, s0
+; GFX10-PAL-NEXT: s_add_i32 s0, s0, 4
; GFX10-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index 34c7614ae36f98..132f018548bd72 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -55,8 +55,8 @@ body: |
; GCN-LABEL: name: func_add_constant_to_fi_uniform_i32
; GCN: liveins: $sgpr30_sgpr31
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_I32 killed $sgpr0, 4, implicit-def dead $scc
+ ; GCN-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_I32 4, $sgpr4, implicit-def dead $scc
; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr4, implicit $exec
; GCN-NEXT: $m0 = S_MOV_B32 -1
; GCN-NEXT: DS_WRITE_B32 undef renamable $vgpr0, killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index 49531e3b4f8f30..330fcfd45dd69f 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -161,8 +161,8 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) {
; FLATSCR-NEXT: s_mov_b32 s0, 0
; FLATSCR-NEXT: .LBB1_1: ; %loadstoreloop
; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1
-; FLATSCR-NEXT: s_add_i32 s3, s33, 0x3000
-; FLATSCR-NEXT: s_add_i32 s1, s0, s3
+; FLATSCR-NEXT: s_add_i32 s1, s33, s0
+; FLATSCR-NEXT: s_addk_i32 s1, 0x3000
; FLATSCR-NEXT: s_add_i32 s0, s0, 1
; FLATSCR-NEXT: s_cmpk_lt_u32 s0, 0x2120
; FLATSCR-NEXT: scratch_store_byte off, v2, s1
@@ -170,8 +170,8 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) {
; FLATSCR-NEXT: s_cbranch_scc1 .LBB1_1
; FLATSCR-NEXT: ; %bb.2: ; %split
; FLATSCR-NEXT: s_movk_i32 s0, 0x2000
-; FLATSCR-NEXT: s_add_i32 s1, s33, 0x3000
-; FLATSCR-NEXT: s_add_i32 s0, s0, s1
+; FLATSCR-NEXT: s_add_i32 s0, s33, s0
+; FLATSCR-NEXT: s_addk_i32 s0, 0x3000
; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s0 offset:208 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_add_i32 s0, s33, 0x3000
>From 90ddda8652fb18d3d00a1f7868bd2b8eedd12c3c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 7 Aug 2024 00:28:08 +0400
Subject: [PATCH 2/4] Fix offset 0 case with different register
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 7 +-
.../CodeGen/AMDGPU/GlobalISel/flat-scratch.ll | 10 +
.../eliminate-frame-index-s-add-i32.mir | 212 ++++++++++++++++++
llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll | 19 +-
llvm/test/CodeGen/AMDGPU/flat-scratch.ll | 42 +++-
5 files changed, 281 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 43054329304f06..bfdc313a17e35d 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2310,8 +2310,11 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (OtherOp.isImm())
NewOffset += OtherOp.getImm();
- if (NewOffset == 0 && DeadSCC) {
- MI->eraseFromParent();
+ if (NewOffset == 0 && DeadSCC && DstOp.getReg() == MaterializedReg) {
+ MI->removeOperand(3);
+ MI->removeOperand(FIOperandNum);
+ MI->setDesc(
+ TII->get(OtherOp.isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
} else if (!MaterializedReg && OtherOp.isImm()) {
// In a kernel, the address should just be an immediate.
// SCC should really be dead, but preserve the def just in case it
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index 5ea88ee6b33aef..83f2329feb8f23 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -15,9 +15,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
; GFX9-NEXT: s_and_b32 s0, s0, 15
+; GFX9-NEXT: s_add_i32 s1, s1, 0
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-NEXT: scratch_store_dword off, v0, s1
; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_add_i32 s0, s0, 0
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
@@ -34,6 +36,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX10-NEXT: s_and_b32 s1, s0, 15
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
+; GFX10-NEXT: s_add_i32 s0, s0, 0
+; GFX10-NEXT: s_add_i32 s1, s1, 0
; GFX10-NEXT: scratch_store_dword off, v0, s0
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -47,9 +51,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_lshl_b32 s1, s0, 2
; GFX940-NEXT: s_and_b32 s0, s0, 15
+; GFX940-NEXT: s_add_i32 s1, s1, 0
; GFX940-NEXT: s_lshl_b32 s0, s0, 2
; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_add_i32 s0, s0, 0
; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
@@ -62,6 +68,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX11-NEXT: s_and_b32 s1, s0, 15
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-NEXT: s_lshl_b32 s1, s1, 2
+; GFX11-NEXT: s_add_i32 s0, s0, 0
+; GFX11-NEXT: s_add_i32 s1, s1, 0
; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc
@@ -76,6 +84,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX12-NEXT: s_and_b32 s1, s0, 15
; GFX12-NEXT: s_lshl_b32 s0, s0, 2
; GFX12-NEXT: s_lshl_b32 s1, s1, 2
+; GFX12-NEXT: s_add_co_i32 s0, s0, 0
+; GFX12-NEXT: s_add_co_i32 s1, s1, 0
; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
index f4742bb7f3ebce..62581879af03f9 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
@@ -260,6 +260,7 @@ body: |
; MUBUFW64-NEXT: {{ $}}
; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = COPY $sgpr8
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_offset0
@@ -267,18 +268,21 @@ body: |
; MUBUFW32-NEXT: {{ $}}
; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = COPY $sgpr8
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_offset0
; FLATSCRW64: liveins: $sgpr8
; FLATSCRW64-NEXT: {{ $}}
; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY $sgpr8
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_offset0
; FLATSCRW32: liveins: $sgpr8
; FLATSCRW32-NEXT: {{ $}}
; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY $sgpr8
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc
SI_RETURN implicit $sgpr7
@@ -302,6 +306,7 @@ body: |
; MUBUFW64-NEXT: {{ $}}
; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = COPY $sgpr8
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__sgpr
@@ -309,18 +314,21 @@ body: |
; MUBUFW32-NEXT: {{ $}}
; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = COPY $sgpr8
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__sgpr
; FLATSCRW64: liveins: $sgpr8
; FLATSCRW64-NEXT: {{ $}}
; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY $sgpr8
; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__sgpr
; FLATSCRW32: liveins: $sgpr8
; FLATSCRW32-NEXT: {{ $}}
; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY $sgpr8
; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
renamable $sgpr7 = S_ADD_I32 %stack.0, $sgpr8, implicit-def dead $scc
SI_RETURN implicit $sgpr7
@@ -928,3 +936,207 @@ body: |
SI_RETURN implicit $sgpr7, implicit $scc
...
+
+---
+name: s_add_i32__0__fi_offset0
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; MUBUFW64-LABEL: name: s_add_i32__0__fi_offset0
+ ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 0
+ ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; MUBUFW32-LABEL: name: s_add_i32__0__fi_offset0
+ ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 0
+ ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; FLATSCRW64-LABEL: name: s_add_i32__0__fi_offset0
+ ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 0, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; FLATSCRW32-LABEL: name: s_add_i32__0__fi_offset0
+ ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 0, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
+ renamable $sgpr7 = S_ADD_I32 0, %stack.0, implicit-def dead $scc
+ SI_RETURN implicit $sgpr7
+
+...
+
+---
+name: s_add_i32__fi_offset0__0
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__0
+ ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 0
+ ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__0
+ ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 0
+ ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__0
+ ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 0, $sgpr32, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__0
+ ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 0, $sgpr32, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
+ renamable $sgpr7 = S_ADD_I32 %stack.0, 0, implicit-def dead $scc
+ SI_RETURN implicit $sgpr7
+
+...
+
+---
+name: s_add_i32__same_sgpr__fi_offset0
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr7
+ ; MUBUFW64-LABEL: name: s_add_i32__same_sgpr__fi_offset0
+ ; MUBUFW64: liveins: $sgpr7
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr7, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = COPY $sgpr7
+ ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; MUBUFW32-LABEL: name: s_add_i32__same_sgpr__fi_offset0
+ ; MUBUFW32: liveins: $sgpr7
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr7, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = COPY $sgpr7
+ ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; FLATSCRW64-LABEL: name: s_add_i32__same_sgpr__fi_offset0
+ ; FLATSCRW64: liveins: $sgpr7
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr7, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY $sgpr7
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; FLATSCRW32-LABEL: name: s_add_i32__same_sgpr__fi_offset0
+ ; FLATSCRW32: liveins: $sgpr7
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr7, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY $sgpr7
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
+ renamable $sgpr7 = S_ADD_I32 $sgpr7, %stack.0, implicit-def dead $scc
+ SI_RETURN implicit $sgpr7
+
+...
+
+---
+name: s_add_i32__different_sgpr__fi_offset0
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr8
+ ; MUBUFW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0
+ ; MUBUFW64: liveins: $sgpr8
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = COPY $sgpr8
+ ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; MUBUFW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0
+ ; MUBUFW32: liveins: $sgpr8
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = COPY $sgpr8
+ ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; FLATSCRW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0
+ ; FLATSCRW64: liveins: $sgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY $sgpr8
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7
+ ;
+ ; FLATSCRW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0
+ ; FLATSCRW32: liveins: $sgpr8
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY $sgpr8
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7
+ renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc
+ SI_RETURN implicit $sgpr7
+
+...
+
+---
+name: s_add_i32__different_sgpr__fi_offset0_live_after
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr8
+ ; MUBUFW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after
+ ; MUBUFW64: liveins: $sgpr8
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = COPY $sgpr8
+ ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8
+ ;
+ ; MUBUFW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after
+ ; MUBUFW32: liveins: $sgpr8
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = COPY $sgpr8
+ ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8
+ ;
+ ; FLATSCRW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after
+ ; FLATSCRW64: liveins: $sgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY $sgpr8
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8
+ ;
+ ; FLATSCRW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after
+ ; FLATSCRW32: liveins: $sgpr8
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY $sgpr8
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8
+ renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc
+ SI_RETURN implicit $sgpr7, implicit $sgpr8
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
index 9d9d5b239a12c8..89da9b8e75bc9c 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
@@ -101,6 +101,7 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) {
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -236,6 +237,7 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) {
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -373,6 +375,7 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) {
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -511,6 +514,8 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) {
; GFX12-SDAG-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 1
+; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -647,10 +652,11 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) {
; GFX12-SDAG-NEXT: s_load_b32 s0, s[2:3], 0x24
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 1
+; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -789,10 +795,11 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) {
; GFX12-SDAG-NEXT: s_load_b32 s0, s[2:3], 0x24
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 1
+; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -932,6 +939,8 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) {
; GFX12-SDAG-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 2
+; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -1068,10 +1077,11 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) {
; GFX12-SDAG-NEXT: s_load_b32 s0, s[2:3], 0x24
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 2
+; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
@@ -1209,10 +1219,11 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) {
; GFX12-SDAG-NEXT: s_load_b32 s0, s[2:3], 0x24
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
-; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 2
+; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index 9ddaa52234daa2..1c3cccc368bff7 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -381,9 +381,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
; GFX9-NEXT: s_and_b32 s0, s0, 15
+; GFX9-NEXT: s_add_i32 s1, s1, 0
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-NEXT: scratch_store_dword off, v0, s1
; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_add_i32 s0, s0, 0
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
@@ -400,6 +402,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX10-NEXT: s_and_b32 s1, s0, 15
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
+; GFX10-NEXT: s_add_i32 s0, s0, 0
+; GFX10-NEXT: s_add_i32 s1, s1, 0
; GFX10-NEXT: scratch_store_dword off, v0, s0
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -414,6 +418,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX11-NEXT: s_and_b32 s1, s0, 15
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-NEXT: s_lshl_b32 s1, s1, 2
+; GFX11-NEXT: s_add_i32 s0, s0, 0
+; GFX11-NEXT: s_add_i32 s1, s1, 0
; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc
@@ -428,6 +434,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX12-NEXT: s_and_b32 s1, s0, 15
; GFX12-NEXT: s_lshl_b32 s0, s0, 2
; GFX12-NEXT: s_lshl_b32 s1, s1, 2
+; GFX12-NEXT: s_add_co_i32 s0, s0, 0
+; GFX12-NEXT: s_add_co_i32 s1, s1, 0
; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -447,9 +455,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s11, 0
; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2
; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15
+; GFX9-PAL-NEXT: s_add_i32 s1, s1, 0
; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-PAL-NEXT: s_add_i32 s0, s0, 0
; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
@@ -461,9 +471,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_lshl_b32 s1, s0, 2
; GFX940-NEXT: s_and_b32 s0, s0, 15
+; GFX940-NEXT: s_add_i32 s1, s1, 0
; GFX940-NEXT: s_lshl_b32 s0, s0, 2
; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
+; GFX940-NEXT: s_add_i32 s0, s0, 0
; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
@@ -485,6 +497,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX10-PAL-NEXT: s_and_b32 s1, s0, 15
; GFX10-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-PAL-NEXT: s_lshl_b32 s1, s1, 2
+; GFX10-PAL-NEXT: s_add_i32 s0, s0, 0
+; GFX10-PAL-NEXT: s_add_i32 s1, s1, 0
; GFX10-PAL-NEXT: scratch_store_dword off, v0, s0
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -499,6 +513,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX11-PAL-NEXT: s_and_b32 s1, s0, 15
; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-PAL-NEXT: s_lshl_b32 s1, s1, 2
+; GFX11-PAL-NEXT: s_add_i32 s0, s0, 0
+; GFX11-PAL-NEXT: s_add_i32 s1, s1, 0
; GFX11-PAL-NEXT: scratch_store_b32 off, v0, s0 dlc
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s1 glc dlc
@@ -513,6 +529,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX12-PAL-NEXT: s_and_b32 s1, s0, 15
; GFX12-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX12-PAL-NEXT: s_lshl_b32 s1, s1, 2
+; GFX12-PAL-NEXT: s_add_co_i32 s0, s0, 0
+; GFX12-PAL-NEXT: s_add_co_i32 s1, s1, 0
; GFX12-PAL-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
; GFX12-PAL-NEXT: s_wait_storecnt 0x0
; GFX12-PAL-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -534,11 +552,13 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
; GFX9-NEXT: s_lshl_b32 s0, s2, 2
+; GFX9-NEXT: s_add_i32 s0, s0, 0
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: scratch_store_dword off, v0, s0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_and_b32 s0, s2, 15
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
+; GFX9-NEXT: s_add_i32 s0, s0, 0
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
@@ -553,6 +573,8 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX10-NEXT: s_and_b32 s0, s2, 15
; GFX10-NEXT: s_lshl_b32 s1, s2, 2
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
+; GFX10-NEXT: s_add_i32 s1, s1, 0
+; GFX10-NEXT: s_add_i32 s0, s0, 0
; GFX10-NEXT: scratch_store_dword off, v0, s1
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc
@@ -565,6 +587,8 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX11-NEXT: s_and_b32 s1, s0, 15
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-NEXT: s_lshl_b32 s1, s1, 2
+; GFX11-NEXT: s_add_i32 s0, s0, 0
+; GFX11-NEXT: s_add_i32 s1, s1, 0
; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc
@@ -577,6 +601,8 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX12-NEXT: s_and_b32 s1, s0, 15
; GFX12-NEXT: s_lshl_b32 s0, s0, 2
; GFX12-NEXT: s_lshl_b32 s1, s1, 2
+; GFX12-NEXT: s_add_co_i32 s0, s0, 0
+; GFX12-NEXT: s_add_co_i32 s1, s1, 0
; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -595,9 +621,11 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2
; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15
+; GFX9-PAL-NEXT: s_add_i32 s1, s1, 0
+; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2
+; GFX9-PAL-NEXT: s_add_i32 s0, s0, 0
; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_endpgm
@@ -605,11 +633,13 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX940-LABEL: store_load_sindex_foo:
; GFX940: ; %bb.0: ; %bb
; GFX940-NEXT: s_lshl_b32 s1, s0, 2
-; GFX940-NEXT: v_mov_b32_e32 v0, 15
; GFX940-NEXT: s_and_b32 s0, s0, 15
+; GFX940-NEXT: s_add_i32 s1, s1, 0
+; GFX940-NEXT: v_mov_b32_e32 v0, 15
+; GFX940-NEXT: s_lshl_b32 s0, s0, 2
; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: s_lshl_b32 s0, s0, 2
+; GFX940-NEXT: s_add_i32 s0, s0, 0
; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
@@ -629,6 +659,8 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX10-PAL-NEXT: s_and_b32 s1, s0, 15
; GFX10-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-PAL-NEXT: s_lshl_b32 s1, s1, 2
+; GFX10-PAL-NEXT: s_add_i32 s0, s0, 0
+; GFX10-PAL-NEXT: s_add_i32 s1, s1, 0
; GFX10-PAL-NEXT: scratch_store_dword off, v0, s0
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -641,6 +673,8 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX11-PAL-NEXT: s_and_b32 s1, s0, 15
; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-PAL-NEXT: s_lshl_b32 s1, s1, 2
+; GFX11-PAL-NEXT: s_add_i32 s0, s0, 0
+; GFX11-PAL-NEXT: s_add_i32 s1, s1, 0
; GFX11-PAL-NEXT: scratch_store_b32 off, v0, s0 dlc
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s1 glc dlc
@@ -653,6 +687,8 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX12-PAL-NEXT: s_and_b32 s1, s0, 15
; GFX12-PAL-NEXT: s_lshl_b32 s0, s0, 2
; GFX12-PAL-NEXT: s_lshl_b32 s1, s1, 2
+; GFX12-PAL-NEXT: s_add_co_i32 s0, s0, 0
+; GFX12-PAL-NEXT: s_add_co_i32 s1, s1, 0
; GFX12-PAL-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
; GFX12-PAL-NEXT: s_wait_storecnt 0x0
; GFX12-PAL-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
>From a83a1b0dd5ca665c7ce99c14405b203674db196c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 7 Aug 2024 20:34:29 +0400
Subject: [PATCH 3/4] AMDGPU: Add more scalar frame index lowering tests
---
.../eliminate-frame-index-s-add-i32.mir | 282 ++++++++++++++++++
1 file changed, 282 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
index 62581879af03f9..a659ac6d2bf5ef 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
@@ -1140,3 +1140,285 @@ body: |
SI_RETURN implicit $sgpr7, implicit $sgpr8
...
+
+---
+name: s_add_i32__identity_sgpr__fi_offset0__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $sgpr8
+
+ ; MUBUFW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset0__kernel
+ ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 0, implicit-def dead $scc
+ ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; MUBUFW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset0__kernel
+ ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 0, implicit-def dead $scc
+ ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; FLATSCRW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset0__kernel
+ ; FLATSCRW64: liveins: $sgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 0, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; FLATSCRW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset0__kernel
+ ; FLATSCRW32: liveins: $sgpr8
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 0, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8
+ renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc
+ SI_RETURN implicit $sgpr8
+
+...
+
+---
+name: s_add_i32__fi_offset0__identity_sgpr__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $sgpr8
+
+ ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__identity_sgpr__kernel
+ ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 0, implicit-def dead $scc
+ ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__identity_sgpr__kernel
+ ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 0, implicit-def dead $scc
+ ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__identity_sgpr__kernel
+ ; FLATSCRW64: liveins: $sgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 0, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__identity_sgpr__kernel
+ ; FLATSCRW32: liveins: $sgpr8
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 0, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8
+ renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc
+ SI_RETURN implicit $sgpr8
+
+...
+
+---
+name: s_add_i32__identity_sgpr__fi_offset32__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+ - { id: 1, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $sgpr8
+
+ ; MUBUFW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset32__kernel
+ ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc
+ ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; MUBUFW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset32__kernel
+ ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc
+ ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; FLATSCRW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset32__kernel
+ ; FLATSCRW64: liveins: $sgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; FLATSCRW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset32__kernel
+ ; FLATSCRW32: liveins: $sgpr8
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8
+ renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.1, implicit-def dead $scc
+ SI_RETURN implicit $sgpr8
+
+...
+
+---
+name: s_add_i32__fi_offset32__identity_sgpr__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+ - { id: 1, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $sgpr8
+
+ ; MUBUFW64-LABEL: name: s_add_i32__fi_offset32__identity_sgpr__kernel
+ ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc
+ ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; MUBUFW32-LABEL: name: s_add_i32__fi_offset32__identity_sgpr__kernel
+ ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc
+ ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset32__identity_sgpr__kernel
+ ; FLATSCRW64: liveins: $sgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset32__identity_sgpr__kernel
+ ; FLATSCRW32: liveins: $sgpr8
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8
+ renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.1, implicit-def dead $scc
+ SI_RETURN implicit $sgpr8
+
+...
+
+
+---
+name: s_add_i32__identity_sgpr__fi_offset0
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr8
+
+ ; MUBUFW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset0
+ ; MUBUFW64: liveins: $sgpr8
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: renamable $sgpr8 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, $sgpr8, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr8 = COPY $sgpr8
+ ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; MUBUFW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset0
+ ; MUBUFW32: liveins: $sgpr8
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: renamable $sgpr8 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, $sgpr8, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr8 = COPY $sgpr8
+ ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; FLATSCRW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset0
+ ; FLATSCRW64: liveins: $sgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: renamable $sgpr8 = COPY $sgpr8
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; FLATSCRW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset0
+ ; FLATSCRW32: liveins: $sgpr8
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: renamable $sgpr8 = COPY $sgpr8
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8
+ renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc
+ SI_RETURN implicit $sgpr8
+
+...
+
+---
+name: s_add_i32__fi_offset32__identity_sgpr
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+ - { id: 1, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr8
+
+ ; MUBUFW64-LABEL: name: s_add_i32__fi_offset32__identity_sgpr
+ ; MUBUFW64: liveins: $sgpr8
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: renamable $sgpr8 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, $sgpr8, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc
+ ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; MUBUFW32-LABEL: name: s_add_i32__fi_offset32__identity_sgpr
+ ; MUBUFW32: liveins: $sgpr8
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: renamable $sgpr8 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, $sgpr8, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc
+ ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset32__identity_sgpr
+ ; FLATSCRW64: liveins: $sgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8
+ ;
+ ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset32__identity_sgpr
+ ; FLATSCRW32: liveins: $sgpr8
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8
+ renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.1, implicit-def dead $scc
+ SI_RETURN implicit $sgpr8
+
+...
>From 598c561b6357bcc80129bbda48aba0559f408059 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 7 Aug 2024 21:06:09 +0400
Subject: [PATCH 4/4] Fix clobbering value for reg += fi case
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 9 +-
.../eliminate-frame-index-s-add-i32.mir | 176 +++++++++---------
llvm/test/CodeGen/AMDGPU/frame-index.mir | 4 +-
3 files changed, 97 insertions(+), 92 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index bfdc313a17e35d..e8c2cbd3dd671b 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2283,12 +2283,17 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// Do an in-place scale of the wave offset to the lane offset.
if (FrameReg && !ST.enableFlatScratch()) {
+ // FIXME: In the common case where the add does not also read its result
+ // (i.e. this isn't a reg += fi), it's not finding the dest reg as
+ // available.
+ Register TmpReg = RS->scavengeRegisterBackwards(
+ AMDGPU::SReg_32_XM0RegClass, MI, false, 0);
BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_LSHR_B32))
- .addDef(DstOp.getReg(), RegState::Renamable)
+ .addDef(TmpReg, RegState::Renamable)
.addReg(FrameReg)
.addImm(ST.getWavefrontSizeLog2())
.setOperandDead(3); // Set SCC dead
- MaterializedReg = DstOp.getReg();
+ MaterializedReg = TmpReg;
}
// If we can't fold the other operand, do another increment.
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
index a659ac6d2bf5ef..9d44e5fba45337 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir
@@ -21,13 +21,13 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__inline_imm__fi_offset0
- ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 12, implicit-def dead $scc
+ ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 12, implicit-def dead $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__inline_imm__fi_offset0
- ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 12, implicit-def dead $scc
+ ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 12, implicit-def dead $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW64-LABEL: name: s_add_i32__inline_imm__fi_offset0
@@ -54,13 +54,13 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__inline_imm
- ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr7, implicit-def dead $scc
+ ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr4, implicit-def dead $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__inline_imm
- ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr7, implicit-def dead $scc
+ ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr4, implicit-def dead $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__inline_imm
@@ -88,13 +88,13 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm
- ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 28, implicit-def $scc
+ ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 28, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm
- ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 28, implicit-def $scc
+ ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 28, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW64-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm
@@ -121,13 +121,13 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__literal__fi_offset0
- ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 68, implicit-def dead $scc
+ ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 68, implicit-def dead $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__literal__fi_offset0
- ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 68, implicit-def dead $scc
+ ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 68, implicit-def dead $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW64-LABEL: name: s_add_i32__literal__fi_offset0
@@ -154,13 +154,13 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__literal
- ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr7, implicit-def $scc
+ ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr4, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__literal
- ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr7, implicit-def $scc
+ ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr4, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__literal
@@ -188,13 +188,13 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__literal__fi_offset96
- ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 164, implicit-def $scc
+ ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 164, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; MUBUFW32-LABEL: name: s_add_i32__literal__fi_offset96
- ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 164, implicit-def $scc
+ ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 164, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW64-LABEL: name: s_add_i32__literal__fi_offset96
@@ -222,13 +222,13 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32____fi_offset96__literal
- ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr7, implicit-def $scc
+ ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr4, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; MUBUFW32-LABEL: name: s_add_i32____fi_offset96__literal
- ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr7, implicit-def $scc
+ ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr4, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW64-LABEL: name: s_add_i32____fi_offset96__literal
@@ -258,16 +258,16 @@ body: |
; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_offset0
; MUBUFW64: liveins: $sgpr8
; MUBUFW64-NEXT: {{ $}}
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def dead $scc
; MUBUFW64-NEXT: renamable $sgpr7 = COPY $sgpr8
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_offset0
; MUBUFW32: liveins: $sgpr8
; MUBUFW32-NEXT: {{ $}}
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def dead $scc
; MUBUFW32-NEXT: renamable $sgpr7 = COPY $sgpr8
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
@@ -304,16 +304,16 @@ body: |
; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__sgpr
; MUBUFW64: liveins: $sgpr8
; MUBUFW64-NEXT: {{ $}}
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def dead $scc
; MUBUFW64-NEXT: renamable $sgpr7 = COPY $sgpr8
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__sgpr
; MUBUFW32: liveins: $sgpr8
; MUBUFW32-NEXT: {{ $}}
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def dead $scc
; MUBUFW32-NEXT: renamable $sgpr7 = COPY $sgpr8
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
@@ -351,16 +351,16 @@ body: |
; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset
; MUBUFW64: liveins: $sgpr8
; MUBUFW64-NEXT: {{ $}}
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def dead $scc
; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 80, implicit-def dead $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset
; MUBUFW32: liveins: $sgpr8
; MUBUFW32-NEXT: {{ $}}
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def dead $scc
; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 80, implicit-def dead $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
@@ -398,16 +398,16 @@ body: |
; MUBUFW64-LABEL: name: s_add_i32__fi_literal_offset__sgpr
; MUBUFW64: liveins: $sgpr8
; MUBUFW64-NEXT: {{ $}}
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+ ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def $scc
; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 80, $sgpr7, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; MUBUFW32-LABEL: name: s_add_i32__fi_literal_offset__sgpr
; MUBUFW32: liveins: $sgpr8
; MUBUFW32-NEXT: {{ $}}
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+ ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def $scc
; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 80, $sgpr7, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
@@ -662,16 +662,16 @@ body: |
; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc
; MUBUFW64: liveins: $sgpr8
; MUBUFW64-NEXT: {{ $}}
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+ ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def $scc
; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 0, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc
; MUBUFW32: liveins: $sgpr8
; MUBUFW32-NEXT: {{ $}}
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+ ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def $scc
; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 0, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
@@ -755,16 +755,16 @@ body: |
; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc
; MUBUFW64: liveins: $sgpr8
; MUBUFW64-NEXT: {{ $}}
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+ ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def $scc
; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 96, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc
; MUBUFW32: liveins: $sgpr8
; MUBUFW32-NEXT: {{ $}}
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def $scc
+ ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def $scc
; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 96, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
@@ -800,13 +800,13 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm
- ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 40, implicit-def $scc
+ ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 40, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; MUBUFW32-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm
- ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, 40, implicit-def $scc
+ ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 40, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW64-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm
@@ -834,13 +834,13 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm
- ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr7, implicit-def $scc
+ ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr4, implicit-def $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; MUBUFW32-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm
- ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr7, implicit-def $scc
+ ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr4, implicit-def $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc
;
; FLATSCRW64-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm
@@ -949,13 +949,13 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__0__fi_offset0
- ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 0
+ ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 0, implicit-def dead $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__0__fi_offset0
- ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 0
+ ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 0, implicit-def dead $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW64-LABEL: name: s_add_i32__0__fi_offset0
@@ -982,13 +982,13 @@ machineFunctionInfo:
body: |
bb.0:
; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__0
- ; MUBUFW64: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 0
+ ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 0, $sgpr4, implicit-def dead $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__0
- ; MUBUFW32: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 0
+ ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 0, $sgpr4, implicit-def dead $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__0
@@ -1018,16 +1018,16 @@ body: |
; MUBUFW64-LABEL: name: s_add_i32__same_sgpr__fi_offset0
; MUBUFW64: liveins: $sgpr7
; MUBUFW64-NEXT: {{ $}}
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr7, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr7, implicit-def dead $scc
; MUBUFW64-NEXT: renamable $sgpr7 = COPY $sgpr7
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__same_sgpr__fi_offset0
; MUBUFW32: liveins: $sgpr7
; MUBUFW32-NEXT: {{ $}}
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr7, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr7, implicit-def dead $scc
; MUBUFW32-NEXT: renamable $sgpr7 = COPY $sgpr7
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
@@ -1064,16 +1064,16 @@ body: |
; MUBUFW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0
; MUBUFW64: liveins: $sgpr8
; MUBUFW64-NEXT: {{ $}}
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def dead $scc
; MUBUFW64-NEXT: renamable $sgpr7 = COPY $sgpr8
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7
;
; MUBUFW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0
; MUBUFW32: liveins: $sgpr8
; MUBUFW32-NEXT: {{ $}}
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def dead $scc
; MUBUFW32-NEXT: renamable $sgpr7 = COPY $sgpr8
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7
;
@@ -1110,16 +1110,16 @@ body: |
; MUBUFW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after
; MUBUFW64: liveins: $sgpr8
; MUBUFW64-NEXT: {{ $}}
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def dead $scc
; MUBUFW64-NEXT: renamable $sgpr7 = COPY $sgpr8
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8
;
; MUBUFW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after
; MUBUFW32: liveins: $sgpr8
; MUBUFW32-NEXT: {{ $}}
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr7, $sgpr8, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def dead $scc
; MUBUFW32-NEXT: renamable $sgpr7 = COPY $sgpr8
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8
;
@@ -1344,16 +1344,16 @@ body: |
; MUBUFW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset0
; MUBUFW64: liveins: $sgpr8
; MUBUFW64-NEXT: {{ $}}
- ; MUBUFW64-NEXT: renamable $sgpr8 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, $sgpr8, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def dead $scc
; MUBUFW64-NEXT: renamable $sgpr8 = COPY $sgpr8
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr8
;
; MUBUFW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset0
; MUBUFW32: liveins: $sgpr8
; MUBUFW32-NEXT: {{ $}}
- ; MUBUFW32-NEXT: renamable $sgpr8 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, $sgpr8, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def dead $scc
; MUBUFW32-NEXT: renamable $sgpr8 = COPY $sgpr8
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr8
;
@@ -1392,16 +1392,16 @@ body: |
; MUBUFW64-LABEL: name: s_add_i32__fi_offset32__identity_sgpr
; MUBUFW64: liveins: $sgpr8
; MUBUFW64-NEXT: {{ $}}
- ; MUBUFW64-NEXT: renamable $sgpr8 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, $sgpr8, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def dead $scc
; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc
; MUBUFW64-NEXT: SI_RETURN implicit $sgpr8
;
; MUBUFW32-LABEL: name: s_add_i32__fi_offset32__identity_sgpr
; MUBUFW32: liveins: $sgpr8
; MUBUFW32-NEXT: {{ $}}
- ; MUBUFW32-NEXT: renamable $sgpr8 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
- ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, $sgpr8, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr4, $sgpr8, implicit-def dead $scc
; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc
; MUBUFW32-NEXT: SI_RETURN implicit $sgpr8
;
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index 132f018548bd72..ae43c215bcaec0 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -55,8 +55,8 @@ body: |
; GCN-LABEL: name: func_add_constant_to_fi_uniform_i32
; GCN: liveins: $sgpr30_sgpr31
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_I32 4, $sgpr4, implicit-def dead $scc
+ ; GCN-NEXT: renamable $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_I32 4, $sgpr0, implicit-def dead $scc
; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr4, implicit $exec
; GCN-NEXT: $m0 = S_MOV_B32 -1
; GCN-NEXT: DS_WRITE_B32 undef renamable $vgpr0, killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec
More information about the llvm-commits
mailing list