[llvm] [AMDGPU] Allocate i1 argument to SGPRs (PR #72461)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 30 18:59:33 PST 2023
================
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
+
+define i1 @i1_func_void() #0 {
+ %val = load i1, ptr addrspace(1) undef
+ ret i1 %val
+}
+
+define void @test_call_i1_func_void() #0 {
+; CIGFX89-LABEL: test_call_i1_func_void:
+; CIGFX89: ; %bb.0:
+; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CIGFX89-NEXT: s_mov_b32 s6, s33
+; CIGFX89-NEXT: s_mov_b32 s33, s32
+; CIGFX89-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; CIGFX89-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
+; CIGFX89-NEXT: s_mov_b64 exec, s[4:5]
+; CIGFX89-NEXT: s_addk_i32 s32, 0x400
+; CIGFX89-NEXT: s_getpc_b64 s[4:5]
+; CIGFX89-NEXT: s_add_u32 s4, s4, i1_func_void at gotpcrel32@lo+4
+; CIGFX89-NEXT: s_addc_u32 s5, s5, i1_func_void at gotpcrel32@hi+12
+; CIGFX89-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; CIGFX89-NEXT: v_writelane_b32 v1, s30, 0
+; CIGFX89-NEXT: v_writelane_b32 v1, s31, 1
+; CIGFX89-NEXT: s_waitcnt lgkmcnt(0)
+; CIGFX89-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; CIGFX89-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; CIGFX89-NEXT: global_store_byte v[2:3], v0, off
+; CIGFX89-NEXT: s_waitcnt vmcnt(0)
+; CIGFX89-NEXT: v_readlane_b32 s31, v1, 1
+; CIGFX89-NEXT: v_readlane_b32 s30, v1, 0
+; CIGFX89-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; CIGFX89-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
+; CIGFX89-NEXT: s_mov_b64 exec, s[4:5]
+; CIGFX89-NEXT: s_addk_i32 s32, 0xfc00
+; CIGFX89-NEXT: s_mov_b32 s33, s6
+; CIGFX89-NEXT: s_waitcnt vmcnt(0)
+; CIGFX89-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: test_call_i1_func_void:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_mov_b32 s2, s33
+; GFX11-NEXT: s_mov_b32 s33, s32
+; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT: scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-NEXT: s_add_i32 s32, s32, 16
+; GFX11-NEXT: s_getpc_b64 s[0:1]
+; GFX11-NEXT: s_add_u32 s0, s0, i1_func_void at gotpcrel32@lo+4
+; GFX11-NEXT: s_addc_u32 s1, s1, i1_func_void at gotpcrel32@hi+12
+; GFX11-NEXT: v_writelane_b32 v1, s30, 0
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT: v_writelane_b32 v1, s31, 1
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX11-NEXT: v_cmp_ne_u32_e64 s0, s0, 0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_readlane_b32 s31, v1, 1
+; GFX11-NEXT: v_readlane_b32 s30, v1, 0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11-NEXT: global_store_b8 v[2:3], v0, off dlc
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT: scratch_load_b32 v1, off, s33 ; 4-byte Folded Reload
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-NEXT: s_add_i32 s32, s32, -16
+; GFX11-NEXT: s_mov_b32 s33, s2
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+
+ %val = call i1 @i1_func_void()
+ store volatile i1 %val, ptr addrspace(1) undef
+ ret void
+}
+
+attributes #0 = { nounwind }
----------------
arsenm wrote:
Don't need this
https://github.com/llvm/llvm-project/pull/72461
More information about the llvm-commits
mailing list