[llvm] [NFC][AMDGPU][GISel] Precommit GlobalISel specific tests for call instruction (PR #165898)

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 31 11:20:55 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-globalisel

Author: Chinmay Deshpande (chinmaydd)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/165898.diff


1 Files Affected:

- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll (+398) 


``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll
new file mode 100644
index 0000000000000..054bc9df0ee60
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll
@@ -0,0 +1,398 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
+
+declare hidden void @external_void_func_void() #0
+declare hidden void @external_void_func_i32(i32) #0
+declare hidden void @external_void_func_i32_inreg(i32 inreg) #0
+declare hidden i32 @external_i32_func_void() #0
+
+declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
+
+declare hidden amdgpu_gfx void @external_gfx_void_func_void() #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_i32(i32) #0
+declare hidden amdgpu_gfx i32 @external_gfx_i32_func_void() #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg) #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 }) #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg) #0
+
+define amdgpu_kernel void @test_call_external_void_func_void() #0 {
+; GFX9-LABEL: test_call_external_void_func_void:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; GFX9-NEXT:    s_add_u32 s0, s0, s17
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GFX9-NEXT:    s_mov_b32 s13, s15
+; GFX9-NEXT:    s_mov_b32 s12, s14
+; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT:    s_getpc_b64 s[18:19]
+; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_void at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_void at rel32@hi+12
+; GFX9-NEXT:    s_mov_b32 s14, s16
+; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT:    s_endpgm
+  call void @external_void_func_void()
+  ret void
+}
+
+define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
+; GFX9-LABEL: test_call_external_void_func_i32_imm:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; GFX9-NEXT:    s_add_u32 s0, s0, s17
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-NEXT:    s_add_u32 s8, s8, 8
+; GFX9-NEXT:    s_addc_u32 s9, s9, 0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GFX9-NEXT:    s_mov_b32 s13, s15
+; GFX9-NEXT:    s_mov_b32 s12, s14
+; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT:    s_getpc_b64 s[18:19]
+; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_i32 at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_i32 at rel32@hi+12
+; GFX9-NEXT:    v_mov_b32_e32 v0, 42
+; GFX9-NEXT:    s_mov_b32 s14, s16
+; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT:    s_endpgm
+  call void @external_void_func_i32(i32 42)
+  ret void
+}
+
+define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
+; GFX9-LABEL: test_call_external_i32_func_void:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; GFX9-NEXT:    s_add_u32 s0, s0, s17
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GFX9-NEXT:    s_mov_b32 s13, s15
+; GFX9-NEXT:    s_mov_b32 s12, s14
+; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT:    s_getpc_b64 s[18:19]
+; GFX9-NEXT:    s_add_u32 s18, s18, external_i32_func_void at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s19, s19, external_i32_func_void at rel32@hi+12
+; GFX9-NEXT:    s_mov_b32 s14, s16
+; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT:    global_store_dword v[0:1], v0, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_endpgm
+  %val = call i32 @external_i32_func_void()
+  store volatile i32 %val, ptr addrspace(1) poison
+  ret void
+}
+
+define amdgpu_kernel void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
+; GFX9-LABEL: test_call_external_void_func_i32_inreg:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s17, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[18:19]
+; GFX9-NEXT:    v_writelane_b32 v40, s17, 2
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX9-NEXT:    s_getpc_b64 s[18:19]
+; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_i32_inreg at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_i32_inreg at rel32@hi+12
+; GFX9-NEXT:    s_mov_b32 s0, s16
+; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
+; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
+; GFX9-NEXT:    s_mov_b32 s32, s33
+; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
+; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
+; GFX9-NEXT:    s_mov_b32 s33, s4
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  call void @external_void_func_i32_inreg(i32 inreg %arg)
+  ret void
+}
+
+define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
+; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; GFX9-NEXT:    s_add_u32 s0, s0, s17
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-NEXT:    v_mov_b32_e32 v3, 3
+; GFX9-NEXT:    buffer_store_byte v3, off, s[0:3], 0
+; GFX9-NEXT:    v_mov_b32_e32 v3, 8
+; GFX9-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:4
+; GFX9-NEXT:    buffer_load_dword v3, off, s[0:3], 0
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    buffer_load_dword v4, off, s[0:3], 0 offset:4
+; GFX9-NEXT:    s_add_u32 s8, s8, 8
+; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GFX9-NEXT:    s_addc_u32 s9, s9, 0
+; GFX9-NEXT:    s_movk_i32 s32, 0x800
+; GFX9-NEXT:    s_mov_b32 s13, s15
+; GFX9-NEXT:    s_mov_b32 s12, s14
+; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT:    s_getpc_b64 s[18:19]
+; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@hi+12
+; GFX9-NEXT:    v_mov_b32_e32 v0, 8
+; GFX9-NEXT:    s_mov_b32 s14, s16
+; GFX9-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-NEXT:    buffer_store_dword v3, off, s[0:3], s32
+; GFX9-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:4
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 offset:8
+; GFX9-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:12
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    global_store_byte v[0:1], v0, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    global_store_dword v[0:1], v1, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_endpgm
+  %in.val = alloca { i8, i32 }, align 4, addrspace(5)
+  %out.val = alloca { i8, i32 }, align 4, addrspace(5)
+  %in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0
+  %in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1
+  store i8 3, ptr addrspace(5) %in.gep0
+  store i32 8, ptr addrspace(5) %in.gep1
+  call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) %out.val, ptr addrspace(5) %in.val)
+  %out.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 0
+  %out.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 1
+  %out.val0 = load i8, ptr addrspace(5) %out.gep0
+  %out.val1 = load i32, ptr addrspace(5) %out.gep1
+  store volatile i8 %out.val0, ptr addrspace(1) poison
+  store volatile i32 %out.val1, ptr addrspace(1) poison
+  ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_void:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s34, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    v_writelane_b32 v40, s34, 2
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX9-NEXT:    s_getpc_b64 s[34:35]
+; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_void_func_void at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_void_func_void at rel32@hi+12
+; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
+; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
+; GFX9-NEXT:    s_mov_b32 s32, s33
+; GFX9-NEXT:    v_readlane_b32 s34, v40, 2
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    s_mov_b32 s33, s34
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  call amdgpu_gfx void @external_gfx_void_func_void()
+  ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_i32_imm:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s34, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    v_writelane_b32 v40, s34, 2
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX9-NEXT:    s_getpc_b64 s[34:35]
+; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_void_func_i32 at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_void_func_i32 at rel32@hi+12
+; GFX9-NEXT:    v_mov_b32_e32 v0, 42
+; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
+; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
+; GFX9-NEXT:    s_mov_b32 s32, s33
+; GFX9-NEXT:    v_readlane_b32 s34, v40, 2
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    s_mov_b32 s33, s34
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  call amdgpu_gfx void @external_gfx_void_func_i32(i32 42)
+  ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 {
+; GFX9-LABEL: test_gfx_call_external_i32_func_void:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s34, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    v_writelane_b32 v40, s34, 2
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX9-NEXT:    s_getpc_b64 s[34:35]
+; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_i32_func_void at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_i32_func_void at rel32@hi+12
+; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT:    global_store_dword v[0:1], v0, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
+; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
+; GFX9-NEXT:    s_mov_b32 s32, s33
+; GFX9-NEXT:    v_readlane_b32 s34, v40, 2
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    s_mov_b32 s33, s34
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %val = call amdgpu_gfx i32 @external_gfx_i32_func_void()
+  store volatile i32 %val, ptr addrspace(1) poison
+  ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_i32_imm_inreg:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s34, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    v_writelane_b32 v40, s34, 3
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
+; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
+; GFX9-NEXT:    s_getpc_b64 s[34:35]
+; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_void_func_i32_inreg at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_void_func_i32_inreg at rel32@hi+12
+; GFX9-NEXT:    s_mov_b32 s4, 42
+; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
+; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
+; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
+; GFX9-NEXT:    s_mov_b32 s32, s33
+; GFX9-NEXT:    v_readlane_b32 s34, v40, 3
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    s_mov_b32 s33, s34
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42)
+  ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_struct_i8_i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s34, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    v_writelane_b32 v40, s34, 2
+; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_ubyte v0, v2, s[34:35]
+; GFX9-NEXT:    global_load_dword v1, v2, s[34:35] offset:4
+; GFX9-NEXT:    s_getpc_b64 s[34:35]
+; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_void_func_struct_i8_i32 at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_void_func_struct_i8_i32 at rel32@hi+12
+; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
+; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
+; GFX9-NEXT:    s_mov_b32 s32, s33
+; GFX9-NEXT:    v_readlane_b32 s34, v40, 2
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    s_mov_b32 s33, s34
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
+  %val = load { i8, i32 }, ptr addrspace(1) %ptr0
+  call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val)
+  ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_struct_i8_i32_inreg:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s34, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    v_writelane_b32 v40, s34, 4
+; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_ubyte v1, v0, s[34:35]
+; GFX9-NEXT:    global_load_dword v2, v0, s[34:35] offset:4
+; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
+; GFX9-NEXT:    v_writelane_b32 v40, s30, 2
+; GFX9-NEXT:    s_getpc_b64 s[34:35]
+; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_void_func_struct_i8_i32_inreg at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_void_func_struct_i8_i32_inreg at rel32@hi+12
+; GFX9-NEXT:    v_writelane_b32 v40, s31, 3
+; GFX9-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-NEXT:    v_readfirstlane_b32 s4, v1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_readfirstlane_b32 s5, v2
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT:    v_readlane_b32 s31, v40, 3
+; GFX9-NEXT:    v_readlane_b32 s30, v40, 2
+; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
+; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
+; GFX9-NEXT:    s_mov_b32 s32, s33
+; GFX9-NEXT:    v_readlane_b32 s34, v40, 4
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    s_mov_b32 s33, s34
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
+  %val = load { i8, i32 }, ptr addrspace(1) %ptr0
+  call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val)
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind noinline }

``````````

</details>


https://github.com/llvm/llvm-project/pull/165898


More information about the llvm-commits mailing list