[llvm] [AMDGPU] Do not fold an immediate into instructions with frame indexes (PR #151263)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 29 17:46:10 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Changpeng Fang (changpeng)

<details>
<summary>Changes</summary>

  Do not fold an immediate into an instruction that already has a frame index operand.
A frame index could possibly turn out to be another immediate.

---

Patch is 24.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151263.diff


8 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+5-4) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll (+44-16) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-scratch.ll (+22-9) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir (+2-1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir (+2-1) 
- (modified) llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll (+2-1) 
- (modified) llvm/test/CodeGen/AMDGPU/issue130120-eliminate-frame-index.ll (+17-9) 
- (modified) llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll (+18-9) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index c2da937552240..49b1683bf1abe 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6120,10 +6120,11 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
           !Op.isIdenticalTo(*MO))
         return false;
 
-      // Do not fold a frame index into an instruction that already has a frame
-      // index. The frame index handling code doesn't handle fixing up operand
-      // constraints if there are multiple indexes.
-      if (Op.isFI() && MO->isFI())
+      // Do not fold a frame index or an immediate into an instruction that
+      // already has a frame index. The frame index handling code doesn't handle
+      // fixing up operand constraints if there are multiple indexes, and a
+      // frame index could possibly turn out to be another immediate.
+      if (Op.isFI() && (MO->isFI() || MO->isImm()))
         return false;
     }
   } else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index a066b15f84d6b..e6a8baceee020 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -1917,8 +1917,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX9-NEXT:    s_mov_b32 s0, 0
 ; GFX9-NEXT:    scratch_store_dword off, v0, s0 offset:4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_movk_i32 s0, 0x3e80
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 15
-; GFX9-NEXT:    s_movk_i32 s0, 0x3e84
+; GFX9-NEXT:    s_add_i32 s0, s0, 4
 ; GFX9-NEXT:    scratch_store_dword off, v0, s0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
@@ -1933,7 +1934,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 13
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 15
-; GFX10-NEXT:    s_movk_i32 s0, 0x3e84
+; GFX10-NEXT:    s_movk_i32 s0, 0x3e80
+; GFX10-NEXT:    s_add_i32 s0, s0, 4
 ; GFX10-NEXT:    scratch_store_dword off, v0, off offset:4
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_store_dword off, v1, s0
@@ -1945,10 +1947,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX942-LABEL: store_load_large_imm_offset_kernel:
 ; GFX942:       ; %bb.0: ; %bb
 ; GFX942-NEXT:    v_mov_b32_e32 v0, 13
+; GFX942-NEXT:    s_movk_i32 s0, 0x3e80
 ; GFX942-NEXT:    scratch_store_dword off, v0, off offset:4 sc0 sc1
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    v_mov_b32_e32 v0, 15
-; GFX942-NEXT:    s_movk_i32 s0, 0x3e84
+; GFX942-NEXT:    s_add_i32 s0, s0, 4
 ; GFX942-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
@@ -1958,7 +1961,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX11-LABEL: store_load_large_imm_offset_kernel:
 ; GFX11:       ; %bb.0: ; %bb
 ; GFX11-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
-; GFX11-NEXT:    s_movk_i32 s0, 0x3e84
+; GFX11-NEXT:    s_movk_i32 s0, 0x3e80
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    s_add_i32 s0, s0, 4
 ; GFX11-NEXT:    scratch_store_b32 off, v0, off offset:4 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    scratch_store_b32 off, v1, s0 dlc
@@ -1986,8 +1991,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; UNALIGNED_GFX9-NEXT:    s_mov_b32 s0, 0
 ; UNALIGNED_GFX9-NEXT:    scratch_store_dword off, v0, s0 offset:4
 ; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
+; UNALIGNED_GFX9-NEXT:    s_movk_i32 s0, 0x3e80
 ; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v0, 15
-; UNALIGNED_GFX9-NEXT:    s_movk_i32 s0, 0x3e84
+; UNALIGNED_GFX9-NEXT:    s_add_i32 s0, s0, 4
 ; UNALIGNED_GFX9-NEXT:    scratch_store_dword off, v0, s0
 ; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
@@ -2002,7 +2008,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; UNALIGNED_GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
 ; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v0, 13
 ; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v1, 15
-; UNALIGNED_GFX10-NEXT:    s_movk_i32 s0, 0x3e84
+; UNALIGNED_GFX10-NEXT:    s_movk_i32 s0, 0x3e80
+; UNALIGNED_GFX10-NEXT:    s_add_i32 s0, s0, 4
 ; UNALIGNED_GFX10-NEXT:    scratch_store_dword off, v0, off offset:4
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; UNALIGNED_GFX10-NEXT:    scratch_store_dword off, v1, s0
@@ -2014,10 +2021,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_kernel:
 ; UNALIGNED_GFX942:       ; %bb.0: ; %bb
 ; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v0, 13
+; UNALIGNED_GFX942-NEXT:    s_movk_i32 s0, 0x3e80
 ; UNALIGNED_GFX942-NEXT:    scratch_store_dword off, v0, off offset:4 sc0 sc1
 ; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v0, 15
-; UNALIGNED_GFX942-NEXT:    s_movk_i32 s0, 0x3e84
+; UNALIGNED_GFX942-NEXT:    s_add_i32 s0, s0, 4
 ; UNALIGNED_GFX942-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
 ; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
@@ -2027,7 +2035,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel:
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
 ; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
-; UNALIGNED_GFX11-NEXT:    s_movk_i32 s0, 0x3e84
+; UNALIGNED_GFX11-NEXT:    s_movk_i32 s0, 0x3e80
+; UNALIGNED_GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; UNALIGNED_GFX11-NEXT:    s_add_i32 s0, s0, 4
 ; UNALIGNED_GFX11-NEXT:    scratch_store_b32 off, v0, off offset:4 dlc
 ; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; UNALIGNED_GFX11-NEXT:    scratch_store_b32 off, v1, s0 dlc
@@ -2061,11 +2071,13 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX9-LABEL: store_load_large_imm_offset_foo:
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_movk_i32 s0, 0x3e80
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 13
+; GFX9-NEXT:    s_add_i32 s1, s32, s0
 ; GFX9-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 15
-; GFX9-NEXT:    s_add_i32 s0, s32, 0x3e84
+; GFX9-NEXT:    s_add_i32 s0, s1, 4
 ; GFX9-NEXT:    scratch_store_dword off, v0, s0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
@@ -2076,8 +2088,10 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX10:       ; %bb.0: ; %bb
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 13
+; GFX10-NEXT:    s_movk_i32 s0, 0x3e80
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 15
-; GFX10-NEXT:    s_add_i32 s0, s32, 0x3e84
+; GFX10-NEXT:    s_add_i32 s1, s32, s0
+; GFX10-NEXT:    s_add_i32 s0, s1, 4
 ; GFX10-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_store_dword off, v1, s0
@@ -2089,11 +2103,13 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX942-LABEL: store_load_large_imm_offset_foo:
 ; GFX942:       ; %bb.0: ; %bb
 ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT:    s_movk_i32 s0, 0x3e80
 ; GFX942-NEXT:    v_mov_b32_e32 v0, 13
+; GFX942-NEXT:    s_add_i32 s1, s32, s0
 ; GFX942-NEXT:    scratch_store_dword off, v0, s32 offset:4 sc0 sc1
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    v_mov_b32_e32 v0, 15
-; GFX942-NEXT:    s_add_i32 s0, s32, 0x3e84
+; GFX942-NEXT:    s_add_i32 s0, s1, 4
 ; GFX942-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
@@ -2104,7 +2120,10 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX11:       ; %bb.0: ; %bb
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
-; GFX11-NEXT:    s_add_i32 s0, s32, 0x3e84
+; GFX11-NEXT:    s_movk_i32 s0, 0x3e80
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_add_i32 s1, s32, s0
+; GFX11-NEXT:    s_add_i32 s0, s1, 4
 ; GFX11-NEXT:    scratch_store_b32 off, v0, s32 offset:4 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    scratch_store_b32 off, v1, s0 dlc
@@ -2133,11 +2152,13 @@ define void @store_load_large_imm_offset_foo() {
 ; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
 ; UNALIGNED_GFX9:       ; %bb.0: ; %bb
 ; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNALIGNED_GFX9-NEXT:    s_movk_i32 s0, 0x3e80
 ; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v0, 13
+; UNALIGNED_GFX9-NEXT:    s_add_i32 s1, s32, s0
 ; UNALIGNED_GFX9-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX9-NEXT:    v_mov_b32_e32 v0, 15
-; UNALIGNED_GFX9-NEXT:    s_add_i32 s0, s32, 0x3e84
+; UNALIGNED_GFX9-NEXT:    s_add_i32 s0, s1, 4
 ; UNALIGNED_GFX9-NEXT:    scratch_store_dword off, v0, s0
 ; UNALIGNED_GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX9-NEXT:    scratch_load_dword v0, off, s0 glc
@@ -2148,8 +2169,10 @@ define void @store_load_large_imm_offset_foo() {
 ; UNALIGNED_GFX10:       ; %bb.0: ; %bb
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v0, 13
+; UNALIGNED_GFX10-NEXT:    s_movk_i32 s0, 0x3e80
 ; UNALIGNED_GFX10-NEXT:    v_mov_b32_e32 v1, 15
-; UNALIGNED_GFX10-NEXT:    s_add_i32 s0, s32, 0x3e84
+; UNALIGNED_GFX10-NEXT:    s_add_i32 s1, s32, s0
+; UNALIGNED_GFX10-NEXT:    s_add_i32 s0, s1, 4
 ; UNALIGNED_GFX10-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; UNALIGNED_GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; UNALIGNED_GFX10-NEXT:    scratch_store_dword off, v1, s0
@@ -2161,11 +2184,13 @@ define void @store_load_large_imm_offset_foo() {
 ; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_foo:
 ; UNALIGNED_GFX942:       ; %bb.0: ; %bb
 ; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNALIGNED_GFX942-NEXT:    s_movk_i32 s0, 0x3e80
 ; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v0, 13
+; UNALIGNED_GFX942-NEXT:    s_add_i32 s1, s32, s0
 ; UNALIGNED_GFX942-NEXT:    scratch_store_dword off, v0, s32 offset:4 sc0 sc1
 ; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX942-NEXT:    v_mov_b32_e32 v0, 15
-; UNALIGNED_GFX942-NEXT:    s_add_i32 s0, s32, 0x3e84
+; UNALIGNED_GFX942-NEXT:    s_add_i32 s0, s1, 4
 ; UNALIGNED_GFX942-NEXT:    scratch_store_dword off, v0, s0 sc0 sc1
 ; UNALIGNED_GFX942-NEXT:    s_waitcnt vmcnt(0)
 ; UNALIGNED_GFX942-NEXT:    scratch_load_dword v0, off, s0 sc0 sc1
@@ -2176,7 +2201,10 @@ define void @store_load_large_imm_offset_foo() {
 ; UNALIGNED_GFX11:       ; %bb.0: ; %bb
 ; UNALIGNED_GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; UNALIGNED_GFX11-NEXT:    v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
-; UNALIGNED_GFX11-NEXT:    s_add_i32 s0, s32, 0x3e84
+; UNALIGNED_GFX11-NEXT:    s_movk_i32 s0, 0x3e80
+; UNALIGNED_GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; UNALIGNED_GFX11-NEXT:    s_add_i32 s1, s32, s0
+; UNALIGNED_GFX11-NEXT:    s_add_i32 s0, s1, 4
 ; UNALIGNED_GFX11-NEXT:    scratch_store_b32 off, v0, s32 offset:4 dlc
 ; UNALIGNED_GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; UNALIGNED_GFX11-NEXT:    scratch_store_b32 off, v1, s0 dlc
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index b25d9b245f5f6..fc8883924dfbc 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -3621,7 +3621,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX9-NEXT:    s_mov_b32 s0, 0
 ; GFX9-NEXT:    scratch_store_dword off, v0, s0 offset:4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_movk_i32 s0, 0x3004
+; GFX9-NEXT:    s_movk_i32 s0, 0x3000
+; GFX9-NEXT:    s_add_i32 s0, s0, 4
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 15
 ; GFX9-NEXT:    scratch_store_dword off, v0, s0 offset:3712
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
@@ -3637,7 +3638,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 13
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 15
-; GFX10-NEXT:    s_movk_i32 s0, 0x3804
+; GFX10-NEXT:    s_movk_i32 s0, 0x3800
+; GFX10-NEXT:    s_add_i32 s0, s0, 4
 ; GFX10-NEXT:    scratch_store_dword off, v0, off offset:4
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_store_dword off, v1, s0 offset:1664
@@ -3682,7 +3684,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
 ; GFX9-PAL-NEXT:    scratch_store_dword off, v0, s0 offset:4
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT:    s_movk_i32 s0, 0x3004
+; GFX9-PAL-NEXT:    s_movk_i32 s0, 0x3000
+; GFX9-PAL-NEXT:    s_add_i32 s0, s0, 4
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v0, 15
 ; GFX9-PAL-NEXT:    scratch_store_dword off, v0, s0 offset:3712
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
@@ -3716,8 +3719,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v0, 13
 ; GFX1010-PAL-NEXT:    v_mov_b32_e32 v1, 15
+; GFX1010-PAL-NEXT:    s_movk_i32 s0, 0x3800
 ; GFX1010-PAL-NEXT:    s_mov_b32 s1, 0
-; GFX1010-PAL-NEXT:    s_movk_i32 s0, 0x3804
+; GFX1010-PAL-NEXT:    s_add_i32 s0, s0, 4
 ; GFX1010-PAL-NEXT:    scratch_store_dword off, v0, s1 offset:4
 ; GFX1010-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX1010-PAL-NEXT:    scratch_store_dword off, v1, s0 offset:1664
@@ -3739,7 +3743,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX1030-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
 ; GFX1030-PAL-NEXT:    v_mov_b32_e32 v0, 13
 ; GFX1030-PAL-NEXT:    v_mov_b32_e32 v1, 15
-; GFX1030-PAL-NEXT:    s_movk_i32 s0, 0x3804
+; GFX1030-PAL-NEXT:    s_movk_i32 s0, 0x3800
+; GFX1030-PAL-NEXT:    s_add_i32 s0, s0, 4
 ; GFX1030-PAL-NEXT:    scratch_store_dword off, v0, off offset:4
 ; GFX1030-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX1030-PAL-NEXT:    scratch_store_dword off, v1, s0 offset:1664
@@ -3785,10 +3790,12 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX9-LABEL: store_load_large_imm_offset_foo:
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_movk_i32 s0, 0x3000
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 13
+; GFX9-NEXT:    s_add_i32 s1, s32, s0
 ; GFX9-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_add_i32 s0, s32, 0x3004
+; GFX9-NEXT:    s_add_i32 s0, s1, 4
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 15
 ; GFX9-NEXT:    scratch_store_dword off, v0, s0 offset:3712
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
@@ -3800,8 +3807,10 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX10:       ; %bb.0: ; %bb
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 13
+; GFX10-NEXT:    s_movk_i32 s0, 0x3800
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 15
-; GFX10-NEXT:    s_add_i32 s0, s32, 0x3804
+; GFX10-NEXT:    s_add_i32 s1, s32, s0
+; GFX10-NEXT:    s_add_i32 s0, s1, 4
 ; GFX10-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    scratch_store_dword off, v1, s0 offset:1664
@@ -3843,10 +3852,12 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX9-PAL-LABEL: store_load_large_imm_offset_foo:
 ; GFX9-PAL:       ; %bb.0: ; %bb
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-PAL-NEXT:    s_movk_i32 s0, 0x3000
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v0, 13
+; GFX9-PAL-NEXT:    s_add_i32 s1, s32, s0
 ; GFX9-PAL-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT:    s_add_i32 s0, s32, 0x3004
+; GFX9-PAL-NEXT:    s_add_i32 s0, s1, 4
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v0, 15
 ; GFX9-PAL-NEXT:    scratch_store_dword off, v0, s0 offset:3712
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
@@ -3872,8 +3883,10 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX10-PAL:       ; %bb.0: ; %bb
 ; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-PAL-NEXT:    v_mov_b32_e32 v0, 13
+; GFX10-PAL-NEXT:    s_movk_i32 s0, 0x3800
 ; GFX10-PAL-NEXT:    v_mov_b32_e32 v1, 15
-; GFX10-PAL-NEXT:    s_add_i32 s0, s32, 0x3804
+; GFX10-PAL-NEXT:    s_add_i32 s1, s32, s0
+; GFX10-PAL-NEXT:    s_add_i32 s0, s1, 4
 ; GFX10-PAL-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-PAL-NEXT:    scratch_store_dword off, v1, s0 offset:1664
diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
index 7fad2f466bc9f..a88b1ecc40cc9 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
@@ -75,7 +75,8 @@ stack:
 body:             |
   bb.0:
     ; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_materializedconst_0
-    ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 256, implicit-def $scc
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 256
+    ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[S_MOV_B32_]], implicit-def $scc
     ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
     ; CHECK-NEXT: SI_RETURN implicit $sgpr4
     %0:sreg_32 = S_MOV_B32 %stack.0
diff --git a/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir b/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir
index cc4314263bcba..2f2d727ee2c59 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir
@@ -46,7 +46,8 @@ body: |
     %2:sreg_32 = S_LSHL2_ADD_U32 %0, %1, implicit-def $scc
 ...
 # GCN-LABEL: name: test_frameindex{{$}}
-# GCN: %1:sreg_32 = S_ADD_I32 %stack.0, 70
+# GCN:      [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 70
+# GCN-NEXT: %1:sreg_32 = S_ADD_I32 %stack.0, [[S_MOV_B32_]]
 ---
 name: test_frameindex
 tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index 15cda622b902d..f2fe61f5376e4 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -360,7 +360,8 @@ entry:
 ; s_add_i32.
 
 ; GCN-LABEL: {{^}}fi_sop2_s_add_u32_literal_error:
-; GCN: s_add_u32 [[ADD_LO:s[0-9]+]], 0, 0x2010
+; GCN: s_movk_i32 [[S_MOVK_I32_:s[0-9]+]], 0x1000
+; GCN: s_add_u32 [[ADD_LO:s[0-9]+]], 0x1010, [[S_MOVK_I32_]]
 ; GCN: s_addc_u32 [[ADD_HI:s[0-9]+]], s{{[0-9]+}}, 0
 define amdgpu_kernel void @fi_sop2_s_add_u32_literal_error() #0 {
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/issue130120-eliminate-frame-index.ll b/llvm/test/CodeGen/AMDGPU/issue130120-eliminate-frame-index.ll
index 1c298014e33e7..300124848c1aa 100644
--- a/llvm/test/CodeGen/AMDGPU/issue130120-eliminate-frame-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/issue130120-eliminate-frame-index.ll
@@ -6,16 +6,24 @@ define amdgpu_gfx [13 x i32] @issue130120() {
 ; CHECK:       ; %bb.0: ; %bb
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; CHECK-NEXT:    s_add_i32 s0, s32, 0xf0
-; CHECK-NEXT:    s_add_i32 s1, s32, 0xf4
-; CHECK-NEXT:    s_add_i32 s2, s32, 0xf8
-; CHECK-NEXT:    s_add_i32 s3, s32, 0xfc
+; CHECK-NEXT:    s_movk_i32 s1, 0xf4
+; CHECK-NEXT:    s_movk_i32 s2, 0xf8
+; CHECK-NEXT:    s_movk_i32 s3, 0xfc
+; CHECK-NEXT:    s_movk_i32 s34, 0x100
 ; CHECK-NEXT:    v_mov_b32_e32 v1, v0
-; CHECK-NEXT:    s_add_i32 s34, s32, 0x100
-; CHECK-NEXT:    s_add_i32 s35, s32, 0x104
-; CHECK-NEXT:    s_add_i32 s36, s32, 0x108
-; CHECK-NEXT:    s_add_i32 s37, s32, 0x110
-; CHECK-NEXT:    s_add_i32 s38, s32, 0x120
+; CHECK-NEXT:    s_movk_i3...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/151263


More information about the llvm-commits mailing list