[llvm] AMDGPU/GlobalISel: Permit mapping G_FRAME_INDEX to sgprs (PR #101325)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 31 05:34:57 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
eliminateFrameIndex should now properly handle materializing
frame indices in SGPRs, so treat this like the other constant
operand types.
On average this will produce worse code; we need to detect
VGPR uses, and improve SGPR->VGPR frame index folds.
---
Patch is 48.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101325.diff
8 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+1-7)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll (+4-2)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll (+9-5)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll (+68-63)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll (+7-7)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir (+33-10)
- (modified) llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll (+25-11)
- (modified) llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll (+119-96)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 8da8c94b4d665..9a6ba5ac68084 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4060,6 +4060,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_FCONSTANT:
case AMDGPU::G_CONSTANT:
case AMDGPU::G_GLOBAL_VALUE:
+ case AMDGPU::G_FRAME_INDEX:
case AMDGPU::G_BLOCK_ADDR:
case AMDGPU::G_READSTEADYCOUNTER:
case AMDGPU::G_READCYCLECOUNTER: {
@@ -4067,13 +4068,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
}
- case AMDGPU::G_FRAME_INDEX: {
- // TODO: This should be the same as other constants, but eliminateFrameIndex
- // currently assumes VALU uses.
- unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
- break;
- }
case AMDGPU::G_DYN_STACKALLOC: {
// Result is always uniform, and a wave reduction is needed for the source.
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll
index 48916d8d9b2c5..84378bcb70684 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll
@@ -10,9 +10,11 @@ define amdgpu_kernel void @stack_write_fi() {
; CHECK-NEXT: s_add_u32 s0, s0, s15
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: s_mov_b32 s5, 0
+; CHECK-NEXT: s_mov_b32 s6, 0
; CHECK-NEXT: s_mov_b32 s4, 0
-; CHECK-NEXT: v_mov_b32_e32 v0, s5
-; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; CHECK-NEXT: v_mov_b32_e32 v0, s6
+; CHECK-NEXT: v_mov_b32_e32 v1, s5
+; CHECK-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v0, s4
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll
index b4b95fdab4ab2..4fdb4082346af 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll
@@ -10,11 +10,13 @@ define amdgpu_ps void @amdgpu_ps() {
; MESA-LABEL: amdgpu_ps:
; MESA: ; %bb.0:
; MESA-NEXT: s_add_u32 flat_scratch_lo, s2, s4
-; MESA-NEXT: s_mov_b64 s[0:1], src_private_base
; MESA-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
-; MESA-NEXT: v_mov_b32_e32 v0, 0
-; MESA-NEXT: v_mov_b32_e32 v1, s1
+; MESA-NEXT: s_mov_b32 s0, 0
+; MESA-NEXT: s_mov_b64 s[2:3], src_private_base
+; MESA-NEXT: s_mov_b32 s1, s3
+; MESA-NEXT: v_mov_b32_e32 v0, s0
; MESA-NEXT: v_mov_b32_e32 v2, 0
+; MESA-NEXT: v_mov_b32_e32 v1, s1
; MESA-NEXT: flat_store_dword v[0:1], v2
; MESA-NEXT: s_waitcnt vmcnt(0)
; MESA-NEXT: s_endpgm
@@ -24,13 +26,15 @@ define amdgpu_ps void @amdgpu_ps() {
; PAL-NEXT: s_getpc_b64 s[2:3]
; PAL-NEXT: s_mov_b32 s2, s0
; PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
-; PAL-NEXT: v_mov_b32_e32 v0, 0
; PAL-NEXT: v_mov_b32_e32 v2, 0
; PAL-NEXT: s_waitcnt lgkmcnt(0)
; PAL-NEXT: s_and_b32 s3, s3, 0xffff
; PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0
-; PAL-NEXT: s_mov_b64 s[0:1], src_private_base
; PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
+; PAL-NEXT: s_mov_b32 s0, 0
+; PAL-NEXT: s_mov_b64 s[2:3], src_private_base
+; PAL-NEXT: s_mov_b32 s1, s3
+; PAL-NEXT: v_mov_b32_e32 v0, s0
; PAL-NEXT: v_mov_b32_e32 v1, s1
; PAL-NEXT: flat_store_dword v[0:1], v2
; PAL-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index a5e4151bf3695..f4fd803c8dda8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -55,41 +55,40 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX940-NEXT: s_lshl_b32 s0, s0, 2
; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s0
-; GFX940-NEXT: scratch_load_dword v0, v0, off sc0 sc1
+; GFX940-NEXT: s_add_i32 s0, s0, 0
+; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
;
; GFX11-LABEL: store_load_sindex_kernel:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v0, 15
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_and_b32 s1, s0, 15
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-NEXT: s_lshl_b32 s1, s1, 2
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: s_add_i32 s0, s0, 0
+; GFX11-NEXT: s_add_i32 s1, s1, 0
; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: scratch_load_b32 v0, v1, off glc dlc
+; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: store_load_sindex_kernel:
; GFX12: ; %bb.0: ; %bb
; GFX12-NEXT: s_load_b32 s0, s[2:3], 0x24
-; GFX12-NEXT: v_mov_b32_e32 v1, 15
+; GFX12-NEXT: v_mov_b32_e32 v0, 15
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_lshl_b32 s1, s0, 2
-; GFX12-NEXT: s_and_b32 s0, s0, 15
-; GFX12-NEXT: v_mov_b32_e32 v0, s1
+; GFX12-NEXT: s_and_b32 s1, s0, 15
; GFX12-NEXT: s_lshl_b32 s0, s0, 2
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT: v_mov_b32_e32 v2, s0
-; GFX12-NEXT: scratch_store_b32 v0, v1, off scope:SCOPE_SYS
+; GFX12-NEXT: s_lshl_b32 s1, s1, 2
+; GFX12-NEXT: s_add_co_i32 s0, s0, 0
+; GFX12-NEXT: s_add_co_i32 s1, s1, 0
+; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
-; GFX12-NEXT: scratch_load_b32 v0, v2, off scope:SCOPE_SYS
+; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_endpgm
bb:
@@ -378,44 +377,44 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
; GFX940-NEXT: s_lshl_b32 s0, s0, 2
; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s0
-; GFX940-NEXT: scratch_load_dword v0, v0, off offset:256 sc0 sc1
+; GFX940-NEXT: s_addk_i32 s0, 0x100
+; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
;
; GFX11-LABEL: store_load_sindex_small_offset_kernel:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x24
-; GFX11-NEXT: scratch_load_b32 v2, off, off glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: scratch_load_b32 v0, off, off glc dlc
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_mov_b32_e32 v0, 15
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_and_b32 s1, s0, 15
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-NEXT: s_lshl_b32 s1, s1, 2
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: s_addk_i32 s0, 0x100
+; GFX11-NEXT: s_addk_i32 s1, 0x100
; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: scratch_load_b32 v0, v1, off offset:256 glc dlc
+; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: store_load_sindex_small_offset_kernel:
; GFX12: ; %bb.0: ; %bb
; GFX12-NEXT: s_load_b32 s0, s[2:3], 0x24
-; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
+; GFX12-NEXT: scratch_load_b32 v0, off, off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: v_mov_b32_e32 v1, 15
+; GFX12-NEXT: v_mov_b32_e32 v0, 15
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_lshl_b32 s1, s0, 2
-; GFX12-NEXT: s_and_b32 s0, s0, 15
-; GFX12-NEXT: v_mov_b32_e32 v0, s1
+; GFX12-NEXT: s_and_b32 s1, s0, 15
; GFX12-NEXT: s_lshl_b32 s0, s0, 2
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT: v_mov_b32_e32 v2, s0
-; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:256 scope:SCOPE_SYS
+; GFX12-NEXT: s_lshl_b32 s1, s1, 2
+; GFX12-NEXT: s_addk_co_i32 s0, 0x100
+; GFX12-NEXT: s_addk_co_i32 s1, 0x100
+; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
-; GFX12-NEXT: scratch_load_b32 v0, v2, off offset:256 scope:SCOPE_SYS
+; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_endpgm
bb:
@@ -692,46 +691,44 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
; GFX940-NEXT: s_lshl_b32 s0, s0, 2
; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, s0
-; GFX940-NEXT: s_movk_i32 s0, 0x4004
-; GFX940-NEXT: scratch_load_dword v0, v0, s0 sc0 sc1
+; GFX940-NEXT: s_addk_i32 s0, 0x4004
+; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
;
; GFX11-LABEL: store_load_sindex_large_offset_kernel:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x24
-; GFX11-NEXT: scratch_load_b32 v2, off, off offset:4 glc dlc
-; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: scratch_load_b32 v0, off, off offset:4 glc dlc
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_mov_b32_e32 v0, 15
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_and_b32 s1, s0, 15
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-NEXT: s_lshl_b32 s1, s1, 2
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: s_addk_i32 s0, 0x4004
+; GFX11-NEXT: s_addk_i32 s1, 0x4004
; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: s_movk_i32 s0, 0x4004
-; GFX11-NEXT: scratch_load_b32 v0, v1, s0 glc dlc
+; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: store_load_sindex_large_offset_kernel:
; GFX12: ; %bb.0: ; %bb
; GFX12-NEXT: s_load_b32 s0, s[2:3], 0x24
-; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
+; GFX12-NEXT: scratch_load_b32 v0, off, off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: v_mov_b32_e32 v1, 15
+; GFX12-NEXT: v_mov_b32_e32 v0, 15
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_lshl_b32 s1, s0, 2
-; GFX12-NEXT: s_and_b32 s0, s0, 15
-; GFX12-NEXT: v_mov_b32_e32 v0, s1
+; GFX12-NEXT: s_and_b32 s1, s0, 15
; GFX12-NEXT: s_lshl_b32 s0, s0, 2
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT: v_mov_b32_e32 v2, s0
-; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:16384 scope:SCOPE_SYS
+; GFX12-NEXT: s_lshl_b32 s1, s1, 2
+; GFX12-NEXT: s_addk_co_i32 s0, 0x4000
+; GFX12-NEXT: s_addk_co_i32 s1, 0x4000
+; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
-; GFX12-NEXT: scratch_load_b32 v0, v2, off offset:16384 scope:SCOPE_SYS
+; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_endpgm
bb:
@@ -995,25 +992,28 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
; GFX940-LABEL: store_load_large_imm_offset_kernel:
; GFX940: ; %bb.0: ; %bb
; GFX940-NEXT: v_mov_b32_e32 v0, 13
+; GFX940-NEXT: s_movk_i32 s0, 0x3e80
; GFX940-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, 0x3e80
-; GFX940-NEXT: v_mov_b32_e32 v1, 15
-; GFX940-NEXT: scratch_store_dword v0, v1, off offset:4 sc0 sc1
+; GFX940-NEXT: v_mov_b32_e32 v0, 15
+; GFX940-NEXT: s_add_i32 s0, s0, 4
+; GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: scratch_load_dword v0, v0, off offset:4 sc0 sc1
+; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
;
; GFX11-LABEL: store_load_large_imm_offset_kernel:
; GFX11: ; %bb.0: ; %bb
-; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 0x3e80
-; GFX11-NEXT: v_mov_b32_e32 v2, 15
+; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
+; GFX11-NEXT: s_movk_i32 s0, 0x3e80
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: s_add_i32 s0, s0, 4
; GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: scratch_store_b32 v1, v2, off offset:4 dlc
+; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: scratch_load_b32 v0, v1, off offset:4 glc dlc
+; GFX11-NEXT: scratch_load_b32 v0, off, s0 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_endpgm
;
@@ -1075,26 +1075,31 @@ define void @store_load_large_imm_offset_foo() {
; GFX940: ; %bb.0: ; %bb
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT: v_mov_b32_e32 v0, 13
+; GFX940-NEXT: s_movk_i32 s0, 0x3e80
+; GFX940-NEXT: s_add_i32 s1, s32, 4
; GFX940-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v0, 0x3e80
-; GFX940-NEXT: v_mov_b32_e32 v1, 15
-; GFX940-NEXT: scratch_store_dword v0, v1, s32 offset:4 sc0 sc1
+; GFX940-NEXT: v_mov_b32_e32 v0, 15
+; GFX940-NEXT: s_add_i32 s0, s0, s1
+; GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: scratch_load_dword v0, v0, s32 offset:4 sc0 sc1
+; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: store_load_large_imm_offset_foo:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 0x3e80
-; GFX11-NEXT: v_mov_b32_e32 v2, 15
+; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
+; GFX11-NEXT: s_movk_i32 s0, 0x3e80
+; GFX11-NEXT: s_add_i32 s1, s32, 4
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: s_add_i32 s0, s0, s1
; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: scratch_store_b32 v1, v2, s32 offset:4 dlc
+; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: scratch_load_b32 v0, v1, s32 offset:4 glc dlc
+; GFX11-NEXT: scratch_load_b32 v0, off, s0 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll
index db944b98a3013..4fcde0f2fc7cf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll
@@ -11,12 +11,11 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr
; GCN-NEXT: s_load_dwordx2 s[24:25], s[6:7], 0x10
; GCN-NEXT: s_add_u32 s0, s0, s13
; GCN-NEXT: s_addc_u32 s1, s1, 0
-; GCN-NEXT: v_mov_b32_e32 v16, 0
+; GCN-NEXT: v_mov_b32_e32 v64, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_load_dwordx16 s[36:51], s[22:23], 0x0
; GCN-NEXT: s_load_dwordx16 s[52:67], s[22:23], 0x40
; GCN-NEXT: s_load_dwordx16 s[4:19], s[22:23], 0x80
-; GCN-NEXT: v_mov_b32_e32 v64, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s36
; GCN-NEXT: v_mov_b32_e32 v1, s37
@@ -143,16 +142,17 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr
; GCN-NEXT: v_mov_b32_e32 v0, s48
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:240
; GCN-NEXT: v_mov_b32_e32 v0, s49
+; GCN-NEXT: s_and_b32 s4, s25, 63
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:244
; GCN-NEXT: v_mov_b32_e32 v0, s50
-; GCN-NEXT: s_and_b32 s4, s25, 63
+; GCN-NEXT: s_lshl_b32 s4, s4, 2
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:248
; GCN-NEXT: v_mov_b32_e32 v0, s51
-; GCN-NEXT: s_lshl_b32 s4, s4, 2
+; GCN-NEXT: s_add_u32 s4, 0, s4
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:252
-; GCN-NEXT: v_add_u32_e32 v0, s4, v16
-; GCN-NEXT: v_mov_b32_e32 v1, s24
-; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GCN-NEXT: v_mov_b32_e32 v0, s24
+; GCN-NEXT: v_mov_b32_e32 v1, s4
+; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0
; GCN-NEXT: s_nop 0
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir
index 14f69c301ec38..76994c5cccf5f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir
@@ -2,22 +2,45 @@
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
---- |
- target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
- define void @test_frame_index_p5() {
- %ptr0 = alloca i32, addrspace(5)
- ret void
- }
-...
---
name: test_frame_index_p5
legalized: true
stack:
- - { id: 0, name: ptr0, offset: 0, size: 4, alignment: 4 }
+ - { id: 0, offset: 0, size: 4, alignment: 4 }
body: |
bb.0:
; CHECK-LABEL: name: test_frame_index_p5
- ; CHECK: [[FRAME_INDEX:%[0-9]+]]:vgpr(p5) = G_FRAME_INDEX %stack.0.ptr0
- %0:_(p5) = G_FRAME_INDEX %stack.0.ptr0
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:sgpr(p5) = G_FRAME_INDEX %stack.0
+ %0:_(p5) = G_FRAME_INDEX %stack.0
+
+...
+
+---
+name: test_frame_index_p5_sgpr_use
+legalized: true
+stack:
+ - { id: 0, offset: 0, size: 4, alignment: 4 }
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: test_frame_index_p5_sgpr_use
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:sgpr(p5) = G_FRAME_INDEX %stack.0
+ ; CHECK-NEXT: $sgpr0 = COPY [[FRAME_INDEX]](p5)
+ %0:_(p5) = G_FRAME_INDEX %stack.0
+ $sgpr0 = COPY %0
+
+...
+
+---
+name: test_frame_index_p5_vgpr_use
+legalized: true
+stack:
+ - { id: 0, offset: 0, size: 4, alignment: 4 }
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: test_frame_index_p5_vgpr_use
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:sgpr(p5) = G_FRAME_INDEX %stack.0
+ ; CHECK-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5)
+ %0:_(p5) = G_FRAME_INDEX %stack.0
+ $vgpr0 = COPY %0
...
diff --git a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
index e2b4865410db8..3216e71e6221a 100644
--- a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
+++ b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
@@ -96,16 +96,29 @@ define void @private_alloca_to_flat(ptr %ptr) {
; OPT-NEXT: store volatile i32 7, ptr [[TMP1]], align 4
; OPT-NEXT: ret void
;
-; ASM-LABEL: private_alloca_to_flat:
-; ASM: ; %bb.0:
-; ASM-NEXT: s_waitcnt vm...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/101325
More information about the llvm-commits
mailing list