[llvm] [AMDGPU] Allocate i1 argument to SGPRs (PR #72461)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 22 04:51:25 PDT 2024


================
@@ -0,0 +1,819 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
+
+define void @void_func_i1(i1 %arg0) {
+; GFX9-LABEL: void_func_i1:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX9-NEXT:    global_store_byte v[0:1], v0, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: void_func_i1:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  store i1 %arg0, ptr addrspace(1) undef
+  ret void
+}
+
+define void @test_call_void_func_i1() {
+; GFX9-LABEL: test_call_void_func_i1:
+; GFX9:    ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s8, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT:    buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    s_getpc_b64 s[4:5]
+; GFX9-NEXT:    s_add_u32 s4, s4, void_func_i1 at gotpcrel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s5, s5, void_func_i1 at gotpcrel32@hi+12
+; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x0
+; GFX9-NEXT:    v_writelane_b32 v2, s30, 0
+; GFX9-NEXT:    v_writelane_b32 v2, s31, 1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+;
+; GFX11-LABEL: test_call_void_func_i1:
+; GFX11:     ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_mov_b32 s4, s33
+; GFX11-NEXT:    s_mov_b32 s33, s32
+; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT:    scratch_store_b32 off, v2, s33          ; 4-byte Folded Spill
+; GFX11-NEXT:    s_mov_b32 exec_lo, s0
+; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
+; GFX11-NEXT:    s_add_i32 s32, s32, 16
+; GFX11-NEXT:    s_getpc_b64 s[0:1]
+; GFX11-NEXT:    s_add_u32 s0, s0, void_func_i1 at gotpcrel32@lo+4
+; GFX11-NEXT:    s_addc_u32 s1, s1, void_func_i1 at gotpcrel32@hi+12
+; GFX11-NEXT:    v_writelane_b32 v2, s30, 0
+; GFX11-NEXT:    s_load_b64 s[2:3], s[0:1], 0x0
+; GFX11-NEXT:    v_writelane_b32 v2, s31, 1
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 1, v0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
+  %val = load i1, ptr addrspace(1) undef
+  call void @void_func_i1(i1 %val)
+  ret void
+}
+
+define void @void_func_i1_zeroext(i1 zeroext %arg0) {
+; GFX9-LABEL: void_func_i1_zeroext:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX9-NEXT:    v_or_b32_e32 v0, 12, v0
+; GFX9-NEXT:    global_store_dword v[0:1], v0, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: void_func_i1_zeroext:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_or_b32_e32 v0, 12, v0
+; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %ext = zext i1 %arg0 to i32
+  %add = add i32 %ext, 12
+  store i32 %add, ptr addrspace(1) undef
+  ret void
+}
+
+define void @test_call_void_func_i1_zeroext() {
+; GFX9-LABEL: test_call_void_func_i1_zeroext:
+; GFX9:    ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s8, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT:    buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    s_getpc_b64 s[4:5]
+; GFX9-NEXT:    s_add_u32 s4, s4, void_func_i1_zeroext at gotpcrel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s5, s5, void_func_i1_zeroext at gotpcrel32@hi+12
+; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x0
+; GFX9-NEXT:    v_writelane_b32 v2, s30, 0
+; GFX9-NEXT:    v_writelane_b32 v2, s31, 1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+;
+; GFX11-LABEL: test_call_void_func_i1_zeroext:
+; GFX11:     ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_mov_b32 s4, s33
+; GFX11-NEXT:    s_mov_b32 s33, s32
+; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT:    scratch_store_b32 off, v2, s33          ; 4-byte Folded Spill
+; GFX11-NEXT:    s_mov_b32 exec_lo, s0
+; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
+; GFX11-NEXT:    s_add_i32 s32, s32, 16
+; GFX11-NEXT:    s_getpc_b64 s[0:1]
+; GFX11-NEXT:    s_add_u32 s0, s0, void_func_i1_zeroext at gotpcrel32@lo+4
+; GFX11-NEXT:    s_addc_u32 s1, s1, void_func_i1_zeroext at gotpcrel32@hi+12
+; GFX11-NEXT:    v_writelane_b32 v2, s30, 0
+; GFX11-NEXT:    s_load_b64 s[2:3], s[0:1], 0x0
+; GFX11-NEXT:    v_writelane_b32 v2, s31, 1
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 1, v0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
+  %val = load i1, ptr addrspace(1) undef
+  call void @void_func_i1_zeroext(i1 %val)
+  ret void
+}
+
+define void @void_func_i1_signext(i1 signext %arg0) {
+; GFX9-LABEL: void_func_i1_signext:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX9-NEXT:    v_sub_u32_e32 v0, 12, v0
+; GFX9-NEXT:    global_store_dword v[0:1], v0, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: void_func_i1_signext:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_sub_nc_u32_e32 v0, 12, v0
+; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %ext = sext i1 %arg0 to i32
+  %add = add i32 %ext, 12
+  store i32 %add, ptr addrspace(1) undef
+  ret void
+}
+
+define void @test_call_void_func_i1_signext() {
+; GFX9-LABEL: test_call_void_func_i1_signext:
+; GFX9:    ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s8, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT:    buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    s_getpc_b64 s[4:5]
+; GFX9-NEXT:    s_add_u32 s4, s4, void_func_i1_signext at gotpcrel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s5, s5, void_func_i1_signext at gotpcrel32@hi+12
+; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x0
+; GFX9-NEXT:    v_writelane_b32 v2, s30, 0
+; GFX9-NEXT:    v_writelane_b32 v2, s31, 1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+;
+; GFX11-LABEL: test_call_void_func_i1_signext:
+; GFX11:     ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_mov_b32 s4, s33
+; GFX11-NEXT:    s_mov_b32 s33, s32
+; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT:    scratch_store_b32 off, v2, s33          ; 4-byte Folded Spill
+; GFX11-NEXT:    s_mov_b32 exec_lo, s0
+; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
+; GFX11-NEXT:    s_add_i32 s32, s32, 16
+; GFX11-NEXT:    s_getpc_b64 s[0:1]
+; GFX11-NEXT:    s_add_u32 s0, s0, void_func_i1_signext at gotpcrel32@lo+4
+; GFX11-NEXT:    s_addc_u32 s1, s1, void_func_i1_signext at gotpcrel32@hi+12
+; GFX11-NEXT:    v_writelane_b32 v2, s30, 0
+; GFX11-NEXT:    s_load_b64 s[2:3], s[0:1], 0x0
+; GFX11-NEXT:    v_writelane_b32 v2, s31, 1
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 1, v0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
+  %val = load i1, ptr addrspace(1) undef
+  call void @void_func_i1_signext(i1 %val)
+  ret void
+}
+
+define void @void_func_a2i1([2 x i1] %arg0) {
+; GFX9-LABEL: void_func_a2i1:
+; GFX9:        ; %bb.0:
+; GFX9-NEXT:     s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:     v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX9-NEXT:     v_cndmask_b32_e64 v1, 0, 1, s[6:7]
+; GFX9-NEXT:     global_store_byte v[0:1], v1, off
+; GFX9-NEXT:     global_store_byte v[0:1], v0, off
+; GFX9-NEXT:     s_waitcnt vmcnt(0)
+; GFX9-NEXT:     s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: void_func_a2i1:
+; GFX11:        ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s1
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    global_store_b8 v[0:1], v1, off
+; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  store [2 x i1] %arg0, ptr addrspace(1) undef
+  ret void
+}
+
+define void @test_call_void_func_a2i1() {
+; GFX9-LABEL: test_call_void_func_a2i1:
+; GFX9:    ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s10, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GFX9-NEXT:    buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    s_getpc_b64 s[4:5]
+; GFX9-NEXT:    s_add_u32 s4, s4, void_func_a2i1 at gotpcrel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s5, s5, void_func_a2i1 at gotpcrel32@hi+12
+; GFX9-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
+; GFX9-NEXT:    v_writelane_b32 v2, s30, 0
+; GFX9-NEXT:    s_mov_b64 s[4:5], 0
+; GFX9-NEXT:    s_mov_b64 s[6:7], -1
+; GFX9-NEXT:    v_writelane_b32 v2, s31, 1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+;
+; GFX11-LABEL: test_call_void_func_a2i1:
+; GFX11:     ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT;    s_mov_b32 s4, s33
+; GFX11-NEXT;    s_mov_b32 s33, s32
+; GFX11-NEXT;    s_xor_saveexec_b32 s0, -1
+; GFX11-NEXT;    scratch_store_b32 off, v2, s33          ; 4-byte Folded Spill
+; GFX11-NEXT;    s_mov_b32 exec_lo, s0
+; GFX11-NEXT;    s_add_i32 s32, s32, 16
+; GFX11-NEXT;    s_getpc_b64 s[0:1]
+; GFX11-NEXT;    s_add_u32 s0, s0, void_func_a2i1 at gotpcrel32@lo+4
+; GFX11-NEXT;    s_addc_u32 s1, s1, void_func_a2i1 at gotpcrel32@hi+12
+; GFX11-NEXT;    v_writelane_b32 v2, s30, 0
+; GFX11-NEXT;    s_load_b64 s[2:3], s[0:1], 0x0
+; GFX11-NEXT;    s_mov_b32 s0, 0
+; GFX11-NEXT;    s_mov_b32 s1, -1
+; GFX11-NEXT;    v_writelane_b32 v2, s31, 1
+; GFX11-NEXT;    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT;    s_swappc_b64 s[30:31], s[2:3]
+  %1 = insertvalue [2 x i1] undef, i1 0, 0
+  %2 = insertvalue [2 x i1] %1, i1 1, 1
+  call void @void_func_a2i1([2 x i1] %2)
+  ret void
+}
+
+define void @i1_arg_i1_use(i1 %arg) {
+; CIGFX89-LABEL: i1_arg_i1_use:
+; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
+; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
+; GFX9:        ; %bb.0:
+; GFX9-NEXT:     s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:     s_xor_b64 s[6:7], s[4:5], -1
+; GFX9-NEXT:     s_and_saveexec_b64 s[4:5], s[6:7]
+; GFX9:        ; %bb.1:
+; GFX9-NEXT:     v_mov_b32_e32 v0, 0
+; GFX9-NEXT:     global_store_dword v[0:1], v0, off
+; GFX9-NEXT:     s_waitcnt vmcnt(0)
+; GFX9-NEXT:   .LBB{{[0-9]+}}_2:
+; GFX9-NEXT:     s_or_b64 exec, exec, s[4:5]
+; GFX9-NEXT:     s_waitcnt vmcnt(0)
+; GFX9-NEXT:     s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: i1_arg_i1_use:
+; GFX11:       ; %bb.0: ; %bb
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_xor_b32 s1, s0, -1
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    s_and_saveexec_b32 s0, s1
+; GFX11:       ; %bb.1: ; %bb1
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
+; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT:  .LBB{{[0-9]+}}_2: ; %bb2
+; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+bb:
+  br i1 %arg, label %bb2, label %bb1
+
+bb1:
+  store volatile i32 0, ptr addrspace(1) undef
+  br label %bb2
+
+bb2:
+  ret void
+}
+
+define void @void_func_v2i1(<2 x i1> %arg0) {
----------------
arsenm wrote:

add an inreg vector i1 test 

https://github.com/llvm/llvm-project/pull/72461


More information about the llvm-commits mailing list