[llvm] AMDGPU: Add testcase for materializing sgpr frame indexes (PR #101306)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 31 01:52:23 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>

These add some IR tests for 57d10b4fc9142d12fbdec578a0cc6f78deb67ef4.
These do rely on some lucky MIR placement to test the scc input, but I
haven't found a better way to do it. Also, scc handling in inline asm
is extremely buggy.

---

Patch is 186.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101306.diff


2 Files Affected:

- (added) llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll (+1836) 
- (added) llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll (+2323) 


``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
new file mode 100644
index 0000000000000..94d1eca05ed0e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
@@ -0,0 +1,1836 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10_1 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10_3 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck -check-prefix=GFX12 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s
+
+; We aren't pressuring the SGPRs, so this can use the add with carry out pre-gfx9.
+define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
+; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
+; GFX10_1:       ; %bb.0:
+; GFX10_1-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_1-NEXT:    s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT:    s_add_i32 s5, s32, 0x80880
+; GFX10_1-NEXT:    buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_1-NEXT:    s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT:    s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT:    v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_1-NEXT:    v_writelane_b32 v1, s59, 0
+; GFX10_1-NEXT:    s_and_b32 s4, 0, exec_lo
+; GFX10_1-NEXT:    v_add_nc_u32_e32 v0, 64, v0
+; GFX10_1-NEXT:    ;;#ASMSTART
+; GFX10_1-NEXT:    ; use alloca0 v0
+; GFX10_1-NEXT:    ;;#ASMEND
+; GFX10_1-NEXT:    v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_1-NEXT:    v_add_nc_u32_e32 v0, 0x4040, v0
+; GFX10_1-NEXT:    v_readfirstlane_b32 s59, v0
+; GFX10_1-NEXT:    ;;#ASMSTART
+; GFX10_1-NEXT:    ; use s59, scc
+; GFX10_1-NEXT:    ;;#ASMEND
+; GFX10_1-NEXT:    v_readlane_b32 s59, v1, 0
+; GFX10_1-NEXT:    s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT:    s_add_i32 s5, s32, 0x80880
+; GFX10_1-NEXT:    buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_1-NEXT:    s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT:    s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_1-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
+; GFX10_3:       ; %bb.0:
+; GFX10_3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_3-NEXT:    s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT:    s_add_i32 s5, s32, 0x80880
+; GFX10_3-NEXT:    buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_3-NEXT:    s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT:    v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_3-NEXT:    v_writelane_b32 v1, s59, 0
+; GFX10_3-NEXT:    s_and_b32 s4, 0, exec_lo
+; GFX10_3-NEXT:    v_add_nc_u32_e32 v0, 64, v0
+; GFX10_3-NEXT:    ;;#ASMSTART
+; GFX10_3-NEXT:    ; use alloca0 v0
+; GFX10_3-NEXT:    ;;#ASMEND
+; GFX10_3-NEXT:    v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_3-NEXT:    v_add_nc_u32_e32 v0, 0x4040, v0
+; GFX10_3-NEXT:    v_readfirstlane_b32 s59, v0
+; GFX10_3-NEXT:    ;;#ASMSTART
+; GFX10_3-NEXT:    ; use s59, scc
+; GFX10_3-NEXT:    ;;#ASMEND
+; GFX10_3-NEXT:    v_readlane_b32 s59, v1, 0
+; GFX10_3-NEXT:    s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT:    s_add_i32 s5, s32, 0x80880
+; GFX10_3-NEXT:    buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_3-NEXT:    s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_3-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT:    s_add_i32 s1, s32, 0x4044
+; GFX11-NEXT:    scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill
+; GFX11-NEXT:    s_mov_b32 exec_lo, s0
+; GFX11-NEXT:    s_add_i32 s0, s32, 64
+; GFX11-NEXT:    v_writelane_b32 v1, s59, 0
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    s_and_b32 s0, 0, exec_lo
+; GFX11-NEXT:    s_addc_u32 s0, s32, 0x4040
+; GFX11-NEXT:    ;;#ASMSTART
+; GFX11-NEXT:    ; use alloca0 v0
+; GFX11-NEXT:    ;;#ASMEND
+; GFX11-NEXT:    s_bitcmp1_b32 s0, 0
+; GFX11-NEXT:    s_bitset0_b32 s0, 0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    s_mov_b32 s59, s0
+; GFX11-NEXT:    ;;#ASMSTART
+; GFX11-NEXT:    ; use s59, scc
+; GFX11-NEXT:    ;;#ASMEND
+; GFX11-NEXT:    v_readlane_b32 s59, v1, 0
+; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT:    s_add_i32 s1, s32, 0x4044
+; GFX11-NEXT:    scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload
+; GFX11-NEXT:    s_mov_b32 exec_lo, s0
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT:    scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill
+; GFX12-NEXT:    s_mov_b32 exec_lo, s0
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT:    s_and_b32 s0, 0, exec_lo
+; GFX12-NEXT:    v_writelane_b32 v1, s59, 0
+; GFX12-NEXT:    s_add_co_ci_u32 s0, s32, 0x4000
+; GFX12-NEXT:    v_mov_b32_e32 v0, s32
+; GFX12-NEXT:    s_bitcmp1_b32 s0, 0
+; GFX12-NEXT:    s_bitset0_b32 s0, 0
+; GFX12-NEXT:    ;;#ASMSTART
+; GFX12-NEXT:    ; use alloca0 v0
+; GFX12-NEXT:    ;;#ASMEND
+; GFX12-NEXT:    s_mov_b32 s59, s0
+; GFX12-NEXT:    ;;#ASMSTART
+; GFX12-NEXT:    ; use s59, scc
+; GFX12-NEXT:    ;;#ASMEND
+; GFX12-NEXT:    v_readlane_b32 s59, v1, 0
+; GFX12-NEXT:    s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT:    scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload
+; GFX12-NEXT:    s_mov_b32 exec_lo, s0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT:    s_add_i32 s6, s32, 0x101100
+; GFX8-NEXT:    buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX8-NEXT:    v_lshrrev_b32_e64 v0, 6, s32
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 64, v0
+; GFX8-NEXT:    v_writelane_b32 v1, s59, 0
+; GFX8-NEXT:    ;;#ASMSTART
+; GFX8-NEXT:    ; use alloca0 v0
+; GFX8-NEXT:    ;;#ASMEND
+; GFX8-NEXT:    v_lshrrev_b32_e64 v0, 6, s32
+; GFX8-NEXT:    s_movk_i32 s59, 0x4040
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s59, v0
+; GFX8-NEXT:    v_readfirstlane_b32 s59, v0
+; GFX8-NEXT:    s_and_b64 s[4:5], 0, exec
+; GFX8-NEXT:    ;;#ASMSTART
+; GFX8-NEXT:    ; use s59, scc
+; GFX8-NEXT:    ;;#ASMEND
+; GFX8-NEXT:    v_readlane_b32 s59, v1, 0
+; GFX8-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT:    s_add_i32 s6, s32, 0x101100
+; GFX8-NEXT:    buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT:    s_add_i32 s6, s32, 0x101100
+; GFX900-NEXT:    buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX900-NEXT:    v_lshrrev_b32_e64 v0, 6, s32
+; GFX900-NEXT:    v_add_u32_e32 v0, 64, v0
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use alloca0 v0
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    v_lshrrev_b32_e64 v0, 6, s32
+; GFX900-NEXT:    v_add_u32_e32 v0, 0x4040, v0
+; GFX900-NEXT:    v_writelane_b32 v1, s59, 0
+; GFX900-NEXT:    v_readfirstlane_b32 s59, v0
+; GFX900-NEXT:    s_and_b64 s[4:5], 0, exec
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s59, scc
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    v_readlane_b32 s59, v1, 0
+; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT:    s_add_i32 s6, s32, 0x101100
+; GFX900-NEXT:    buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX900-NEXT:    s_waitcnt vmcnt(0)
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    s_xor_saveexec_b64 s[0:1], -1
+; GFX940-NEXT:    s_add_i32 s2, s32, 0x4044
+; GFX940-NEXT:    scratch_store_dword off, v1, s2 sc0 sc1 ; 4-byte Folded Spill
+; GFX940-NEXT:    s_mov_b64 exec, s[0:1]
+; GFX940-NEXT:    s_add_i32 s0, s32, 64
+; GFX940-NEXT:    v_mov_b32_e32 v0, s0
+; GFX940-NEXT:    s_and_b64 s[0:1], 0, exec
+; GFX940-NEXT:    s_addc_u32 s0, s32, 0x4040
+; GFX940-NEXT:    s_bitcmp1_b32 s0, 0
+; GFX940-NEXT:    s_bitset0_b32 s0, 0
+; GFX940-NEXT:    v_writelane_b32 v1, s59, 0
+; GFX940-NEXT:    s_mov_b32 s59, s0
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use alloca0 v0
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s59, scc
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    v_readlane_b32 s59, v1, 0
+; GFX940-NEXT:    s_xor_saveexec_b64 s[0:1], -1
+; GFX940-NEXT:    s_add_i32 s2, s32, 0x4044
+; GFX940-NEXT:    scratch_load_dword v1, off, s2 ; 4-byte Folded Reload
+; GFX940-NEXT:    s_mov_b64 exec, s[0:1]
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+  %alloca0 = alloca [4096 x i32], align 64, addrspace(5)
+  %alloca1 = alloca i32, align 4, addrspace(5)
+  call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
+  call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1, i32 0)
+  ret void
+}
+
+; %alloca1 should end up materializing with s_mov_b32, and scc is
+; available.
+define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
+; GFX10_1-LABEL: scalar_mov_materializes_frame_index_dead_scc:
+; GFX10_1:       ; %bb.0:
+; GFX10_1-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_1-NEXT:    s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT:    s_add_i32 s5, s32, 0x80880
+; GFX10_1-NEXT:    buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_1-NEXT:    s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT:    s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT:    v_writelane_b32 v1, s59, 0
+; GFX10_1-NEXT:    v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_1-NEXT:    s_lshr_b32 s59, s32, 5
+; GFX10_1-NEXT:    s_addk_i32 s59, 0x4040
+; GFX10_1-NEXT:    v_add_nc_u32_e32 v0, 64, v0
+; GFX10_1-NEXT:    ;;#ASMSTART
+; GFX10_1-NEXT:    ; use alloca0 v0
+; GFX10_1-NEXT:    ;;#ASMEND
+; GFX10_1-NEXT:    ;;#ASMSTART
+; GFX10_1-NEXT:    ; use s59
+; GFX10_1-NEXT:    ;;#ASMEND
+; GFX10_1-NEXT:    v_readlane_b32 s59, v1, 0
+; GFX10_1-NEXT:    s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT:    s_add_i32 s5, s32, 0x80880
+; GFX10_1-NEXT:    buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_1-NEXT:    s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT:    s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_1-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10_3-LABEL: scalar_mov_materializes_frame_index_dead_scc:
+; GFX10_3:       ; %bb.0:
+; GFX10_3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_3-NEXT:    s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT:    s_add_i32 s5, s32, 0x80880
+; GFX10_3-NEXT:    buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_3-NEXT:    s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT:    v_writelane_b32 v1, s59, 0
+; GFX10_3-NEXT:    v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_3-NEXT:    s_lshr_b32 s59, s32, 5
+; GFX10_3-NEXT:    s_addk_i32 s59, 0x4040
+; GFX10_3-NEXT:    v_add_nc_u32_e32 v0, 64, v0
+; GFX10_3-NEXT:    ;;#ASMSTART
+; GFX10_3-NEXT:    ; use alloca0 v0
+; GFX10_3-NEXT:    ;;#ASMEND
+; GFX10_3-NEXT:    ;;#ASMSTART
+; GFX10_3-NEXT:    ; use s59
+; GFX10_3-NEXT:    ;;#ASMEND
+; GFX10_3-NEXT:    v_readlane_b32 s59, v1, 0
+; GFX10_3-NEXT:    s_xor_saveexec_b32 s4, -1
+; GFX10_3-NEXT:    s_add_i32 s5, s32, 0x80880
+; GFX10_3-NEXT:    buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_3-NEXT:    s_mov_b32 exec_lo, s4
+; GFX10_3-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_3-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: scalar_mov_materializes_frame_index_dead_scc:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT:    s_add_i32 s1, s32, 0x4044
+; GFX11-NEXT:    scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill
+; GFX11-NEXT:    s_mov_b32 exec_lo, s0
+; GFX11-NEXT:    v_writelane_b32 v1, s59, 0
+; GFX11-NEXT:    s_add_i32 s0, s32, 64
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-NEXT:    s_add_i32 s0, s32, 0x4040
+; GFX11-NEXT:    ;;#ASMSTART
+; GFX11-NEXT:    ; use alloca0 v0
+; GFX11-NEXT:    ;;#ASMEND
+; GFX11-NEXT:    s_mov_b32 s59, s0
+; GFX11-NEXT:    ;;#ASMSTART
+; GFX11-NEXT:    ; use s59
+; GFX11-NEXT:    ;;#ASMEND
+; GFX11-NEXT:    v_readlane_b32 s59, v1, 0
+; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT:    s_add_i32 s1, s32, 0x4044
+; GFX11-NEXT:    scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload
+; GFX11-NEXT:    s_mov_b32 exec_lo, s0
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: scalar_mov_materializes_frame_index_dead_scc:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT:    scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill
+; GFX12-NEXT:    s_mov_b32 exec_lo, s0
+; GFX12-NEXT:    v_writelane_b32 v1, s59, 0
+; GFX12-NEXT:    s_add_co_i32 s0, s32, 0x4000
+; GFX12-NEXT:    v_mov_b32_e32 v0, s32
+; GFX12-NEXT:    s_mov_b32 s59, s0
+; GFX12-NEXT:    ;;#ASMSTART
+; GFX12-NEXT:    ; use alloca0 v0
+; GFX12-NEXT:    ;;#ASMEND
+; GFX12-NEXT:    ;;#ASMSTART
+; GFX12-NEXT:    ; use s59
+; GFX12-NEXT:    ;;#ASMEND
+; GFX12-NEXT:    v_readlane_b32 s59, v1, 0
+; GFX12-NEXT:    s_xor_saveexec_b32 s0, -1
+; GFX12-NEXT:    scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload
+; GFX12-NEXT:    s_mov_b32 exec_lo, s0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: scalar_mov_materializes_frame_index_dead_scc:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT:    s_add_i32 s6, s32, 0x101100
+; GFX8-NEXT:    buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX8-NEXT:    v_writelane_b32 v1, s59, 0
+; GFX8-NEXT:    s_lshr_b32 s59, s32, 6
+; GFX8-NEXT:    v_lshrrev_b32_e64 v0, 6, s32
+; GFX8-NEXT:    s_addk_i32 s59, 0x4040
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 64, v0
+; GFX8-NEXT:    ;;#ASMSTART
+; GFX8-NEXT:    ; use alloca0 v0
+; GFX8-NEXT:    ;;#ASMEND
+; GFX8-NEXT:    ;;#ASMSTART
+; GFX8-NEXT:    ; use s59
+; GFX8-NEXT:    ;;#ASMEND
+; GFX8-NEXT:    v_readlane_b32 s59, v1, 0
+; GFX8-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX8-NEXT:    s_add_i32 s6, s32, 0x101100
+; GFX8-NEXT:    buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX900-LABEL: scalar_mov_materializes_frame_index_dead_scc:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT:    s_add_i32 s6, s32, 0x101100
+; GFX900-NEXT:    buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX900-NEXT:    v_writelane_b32 v1, s59, 0
+; GFX900-NEXT:    s_lshr_b32 s59, s32, 6
+; GFX900-NEXT:    v_lshrrev_b32_e64 v0, 6, s32
+; GFX900-NEXT:    s_addk_i32 s59, 0x4040
+; GFX900-NEXT:    v_add_u32_e32 v0, 64, v0
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use alloca0 v0
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    ;;#ASMSTART
+; GFX900-NEXT:    ; use s59
+; GFX900-NEXT:    ;;#ASMEND
+; GFX900-NEXT:    v_readlane_b32 s59, v1, 0
+; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX900-NEXT:    s_add_i32 s6, s32, 0x101100
+; GFX900-NEXT:    buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX900-NEXT:    s_waitcnt vmcnt(0)
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX940-LABEL: scalar_mov_materializes_frame_index_dead_scc:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX940-NEXT:    s_xor_saveexec_b64 s[0:1], -1
+; GFX940-NEXT:    s_add_i32 s2, s32, 0x4044
+; GFX940-NEXT:    scratch_store_dword off, v1, s2 sc0 sc1 ; 4-byte Folded Spill
+; GFX940-NEXT:    s_mov_b64 exec, s[0:1]
+; GFX940-NEXT:    s_add_i32 s0, s32, 64
+; GFX940-NEXT:    v_mov_b32_e32 v0, s0
+; GFX940-NEXT:    s_add_i32 s0, s32, 0x4040
+; GFX940-NEXT:    v_writelane_b32 v1, s59, 0
+; GFX940-NEXT:    s_mov_b32 s59, s0
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use alloca0 v0
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    ;;#ASMSTART
+; GFX940-NEXT:    ; use s59
+; GFX940-NEXT:    ;;#ASMEND
+; GFX940-NEXT:    v_readlane_b32 s59, v1, 0
+; GFX940-NEXT:    s_xor_saveexec_b64 s[0:1], -1
+; GFX940-NEXT:    s_add_i32 s2, s32, 0x4044
+; GFX940-NEXT:    scratch_load_dword v1, off, s2 ; 4-byte Folded Reload
+; GFX940-NEXT:    s_mov_b64 exec, s[0:1]
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_setpc_b64 s[30:31]
+  %alloca0 = alloca [4096 x i32], align 64, addrspace(5)
+  %alloca1 = alloca i32, align 4, addrspace(5)
+  call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
+  call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca1)
+  ret void
+}
+
+define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
+; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp:
+; GFX10_1:       ; %bb.0:
+; GFX10_1-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_1-NEXT:    s_mov_b32 s5, s33
+; GFX10_1-NEXT:    s_mov_b32 s33, s32
+; GFX10_1-NEXT:    s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT:    s_add_i32 s6, s33, 0x80880
+; GFX10_1-NEXT:    buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX10_1-NEXT:    s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT:    s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT:    v_lshrrev_b32_e64 v0, 5, s33
+; GFX10_1-NEXT:    v_writelane_b32 v1, s59, 0
+; GFX10_1-NEXT:    s_add_i32 s32, s32, 0x81000
+; GFX10_1-NEXT:    s_and_b32 s4, 0, exec_lo
+; GFX10_1-NEXT:    v_add_nc_u32_e32 v0, 64, v0
+; GFX10_1-NEXT:    ;;#ASMSTART
+; GFX10_1-NEXT:    ; use alloca0 v0
+; GFX10_1-NEXT:    ;;#ASMEND
+; GFX10_1-NEXT:    v_lshrrev_b32_e64 v0, 5, s33
+; GFX10_1-NEXT:    v_add_nc_u32_e32 v0, 0x4040, v0
+; GFX10_1-NEXT:    v_readfirstlane_b32 s59, v0
+; GFX10_1-NEXT:    ;;#ASMSTART
+; GFX10_1-NEXT:    ; use s59, scc
+; GFX10_1-NEXT:    ;;#ASMEND
+; GFX10_1-NEXT:    v_readlane_b32 s59, v1, 0
+; GFX10_1-NEXT:    s_xor_saveexec_b32 s4, -1
+; GFX10_1-NEXT:    s_add_i32 s6, s33, 0x80880
+; GFX10_1-NEXT:    buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX10_1-NEXT:    s_waitcnt_depctr 0xffe3
+; GFX10_1-NEXT:    s_mov_b32 exec_lo, s4
+; GFX10_1-NEXT:    s_add_i32 s32, s32, 0xfff7f000
+; GFX10_1-NEXT:    s_mov_b32 s33, s5
+; GFX10_1-NEXT:    s_waitcnt vmcnt(0)
+; GFX10_1-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp:
+; GFX10_3:       ; %bb.0:
+; GFX10_3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_3-NEXT:    s_mov_b32 s5, s33
+; GFX10_3-NEXT:    s_mov_b32 s33, s32
+; GFX10_3...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/101306


More information about the llvm-commits mailing list