[llvm] [AMDGPU] Allocate i1 argument to SGPRs (PR #72461)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 30 18:59:33 PST 2023


================
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
+
+define i1 @i1_func_void() #0 {
+  %val = load i1, ptr addrspace(1) undef
+  ret i1 %val
+}
+
+define void @test_call_i1_func_void() #0 {
+; CIGFX89-LABEL: test_call_i1_func_void:
+; CIGFX89: ; %bb.0:
+; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CIGFX89-NEXT:    s_mov_b32 s6, s33
+; CIGFX89-NEXT:    s_mov_b32 s33, s32
+; CIGFX89-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; CIGFX89-NEXT:    buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
+; CIGFX89-NEXT:    s_mov_b64 exec, s[4:5]
+; CIGFX89-NEXT:    s_addk_i32 s32, 0x400
+; CIGFX89-NEXT:    s_getpc_b64 s[4:5]
+; CIGFX89-NEXT:    s_add_u32 s4, s4, i1_func_void at gotpcrel32@lo+4
+; CIGFX89-NEXT:    s_addc_u32 s5, s5, i1_func_void at gotpcrel32@hi+12
+; CIGFX89-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; CIGFX89-NEXT:    v_writelane_b32 v1, s30, 0
+; CIGFX89-NEXT:    v_writelane_b32 v1, s31, 1
+; CIGFX89-NEXT:    s_waitcnt lgkmcnt(0)
+; CIGFX89-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; CIGFX89-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; CIGFX89-NEXT:    global_store_byte v[2:3], v0, off
+; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
+; CIGFX89-NEXT:    v_readlane_b32 s31, v1, 1
+; CIGFX89-NEXT:    v_readlane_b32 s30, v1, 0
+; CIGFX89-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; CIGFX89-NEXT:    buffer_load_dword v1, off, s[0:3], s33  ; 4-byte Folded Reload
+; CIGFX89-NEXT:    s_mov_b64 exec, s[4:5]
+; CIGFX89-NEXT:    s_addk_i32 s32, 0xfc00
+; CIGFX89-NEXT:    s_mov_b32 s33, s6
+; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
+; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: test_call_i1_func_void:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_mov_b32 s2, s33
+; GFX11-NEXT:    s_mov_b32 s33, s32
+; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT:    scratch_store_b32 off, v1, s33          ; 4-byte Folded Spill
+; GFX11-NEXT:    s_mov_b32 exec_lo, s0
+; GFX11-NEXT:    s_add_i32 s32, s32, 16
+; GFX11-NEXT:    s_getpc_b64 s[0:1]
+; GFX11-NEXT:    s_add_u32 s0, s0, i1_func_void at gotpcrel32@lo+4
+; GFX11-NEXT:    s_addc_u32 s1, s1, i1_func_void at gotpcrel32@hi+12
+; GFX11-NEXT:    v_writelane_b32 v1, s30, 0
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT:    v_writelane_b32 v1, s31, 1
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, s0, 0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_readlane_b32 s31, v1, 1
+; GFX11-NEXT:    v_readlane_b32 s30, v1, 0
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11-NEXT:    global_store_b8 v[2:3], v0, off dlc
+; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT:    scratch_load_b32 v1, off, s33           ; 4-byte Folded Reload
+; GFX11-NEXT:    s_mov_b32 exec_lo, s0
+; GFX11-NEXT:    s_add_i32 s32, s32, -16
+; GFX11-NEXT:    s_mov_b32 s33, s2
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+
+  %val = call i1 @i1_func_void()
+  store volatile i1 %val, ptr addrspace(1) undef
----------------
arsenm wrote:

Avoid undef store 

https://github.com/llvm/llvm-project/pull/72461


More information about the llvm-commits mailing list