[llvm] [NFC][AMDGPU][GISel] Precommit GlobalISel specific tests for call instruction (PR #165898)
Chinmay Deshpande via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 31 11:20:14 PDT 2025
https://github.com/chinmaydd created https://github.com/llvm/llvm-project/pull/165898
None
>From f6f275596e0e3ee817987d2737b4cd7a7bd7e59a Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <ChinmayDiwakar.Deshpande at amd.com>
Date: Fri, 31 Oct 2025 14:19:12 -0400
Subject: [PATCH] [NFC][AMDGPU][GISel] Precommit GlobalISel specific tests for
call instruction
---
llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll | 398 ++++++++++++++++++++
1 file changed, 398 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll
new file mode 100644
index 0000000000000..054bc9df0ee60
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll
@@ -0,0 +1,398 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
+
+declare hidden void @external_void_func_void() #0
+declare hidden void @external_void_func_i32(i32) #0
+declare hidden void @external_void_func_i32_inreg(i32 inreg) #0
+declare hidden i32 @external_i32_func_void() #0
+
+declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
+
+declare hidden amdgpu_gfx void @external_gfx_void_func_void() #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_i32(i32) #0
+declare hidden amdgpu_gfx i32 @external_gfx_i32_func_void() #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg) #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 }) #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg) #0
+
+define amdgpu_kernel void @test_call_external_void_func_void() #0 {
+; GFX9-LABEL: test_call_external_void_func_void:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
+; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
+; GFX9-NEXT: s_add_u32 s0, s0, s17
+; GFX9-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT: s_getpc_b64 s[18:19]
+; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_void at rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_void at rel32@hi+12
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT: s_endpgm
+ call void @external_void_func_void()
+ ret void
+}
+
+define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
+; GFX9-LABEL: test_call_external_void_func_i32_imm:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
+; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
+; GFX9-NEXT: s_add_u32 s0, s0, s17
+; GFX9-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-NEXT: s_add_u32 s8, s8, 8
+; GFX9-NEXT: s_addc_u32 s9, s9, 0
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT: s_getpc_b64 s[18:19]
+; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i32 at rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i32 at rel32@hi+12
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT: s_endpgm
+ call void @external_void_func_i32(i32 42)
+ ret void
+}
+
+define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
+; GFX9-LABEL: test_call_external_i32_func_void:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
+; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
+; GFX9-NEXT: s_add_u32 s0, s0, s17
+; GFX9-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT: s_getpc_b64 s[18:19]
+; GFX9-NEXT: s_add_u32 s18, s18, external_i32_func_void at rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s19, s19, external_i32_func_void at rel32@hi+12
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT: global_store_dword v[0:1], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_endpgm
+ %val = call i32 @external_i32_func_void()
+ store volatile i32 %val, ptr addrspace(1) poison
+ ret void
+}
+
+define amdgpu_kernel void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
+; GFX9-LABEL: test_call_external_void_func_i32_inreg:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s17, s33
+; GFX9-NEXT: s_mov_b32 s33, s32
+; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[18:19]
+; GFX9-NEXT: v_writelane_b32 v40, s17, 2
+; GFX9-NEXT: s_addk_i32 s32, 0x400
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: s_getpc_b64 s[18:19]
+; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i32_inreg at rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i32_inreg at rel32@hi+12
+; GFX9-NEXT: s_mov_b32 s0, s16
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: s_mov_b32 s32, s33
+; GFX9-NEXT: v_readlane_b32 s4, v40, 2
+; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b32 s33, s4
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ call void @external_void_func_i32_inreg(i32 inreg %arg)
+ ret void
+}
+
+define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
+; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
+; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
+; GFX9-NEXT: s_add_u32 s0, s0, s17
+; GFX9-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-NEXT: v_mov_b32_e32 v3, 3
+; GFX9-NEXT: buffer_store_byte v3, off, s[0:3], 0
+; GFX9-NEXT: v_mov_b32_e32 v3, 8
+; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], 0
+; GFX9-NEXT: s_nop 0
+; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:4
+; GFX9-NEXT: s_add_u32 s8, s8, 8
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; GFX9-NEXT: s_addc_u32 s9, s9, 0
+; GFX9-NEXT: s_movk_i32 s32, 0x800
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT: s_getpc_b64 s[18:19]
+; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@hi+12
+; GFX9-NEXT: v_mov_b32_e32 v0, 8
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_waitcnt vmcnt(1)
+; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], s32
+; GFX9-NEXT: s_waitcnt vmcnt(1)
+; GFX9-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:4
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:8
+; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:12
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_byte v[0:1], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: global_store_dword v[0:1], v1, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_endpgm
+ %in.val = alloca { i8, i32 }, align 4, addrspace(5)
+ %out.val = alloca { i8, i32 }, align 4, addrspace(5)
+ %in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0
+ %in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1
+ store i8 3, ptr addrspace(5) %in.gep0
+ store i32 8, ptr addrspace(5) %in.gep1
+ call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) %out.val, ptr addrspace(5) %in.val)
+ %out.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 0
+ %out.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 1
+ %out.val0 = load i8, ptr addrspace(5) %out.gep0
+ %out.val1 = load i32, ptr addrspace(5) %out.gep1
+ store volatile i8 %out.val0, ptr addrspace(1) poison
+ store volatile i32 %out.val1, ptr addrspace(1) poison
+ ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_void:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s34, s33
+; GFX9-NEXT: s_mov_b32 s33, s32
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: v_writelane_b32 v40, s34, 2
+; GFX9-NEXT: s_addk_i32 s32, 0x400
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: s_getpc_b64 s[34:35]
+; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_void_func_void at rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_void_func_void at rel32@hi+12
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: s_mov_b32 s32, s33
+; GFX9-NEXT: v_readlane_b32 s34, v40, 2
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: s_mov_b32 s33, s34
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ call amdgpu_gfx void @external_gfx_void_func_void()
+ ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_i32_imm:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s34, s33
+; GFX9-NEXT: s_mov_b32 s33, s32
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: v_writelane_b32 v40, s34, 2
+; GFX9-NEXT: s_addk_i32 s32, 0x400
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: s_getpc_b64 s[34:35]
+; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_void_func_i32 at rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_void_func_i32 at rel32@hi+12
+; GFX9-NEXT: v_mov_b32_e32 v0, 42
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: s_mov_b32 s32, s33
+; GFX9-NEXT: v_readlane_b32 s34, v40, 2
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: s_mov_b32 s33, s34
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ call amdgpu_gfx void @external_gfx_void_func_i32(i32 42)
+ ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 {
+; GFX9-LABEL: test_gfx_call_external_i32_func_void:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s34, s33
+; GFX9-NEXT: s_mov_b32 s33, s32
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: v_writelane_b32 v40, s34, 2
+; GFX9-NEXT: s_addk_i32 s32, 0x400
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: s_getpc_b64 s[34:35]
+; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_i32_func_void at rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_i32_func_void at rel32@hi+12
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: global_store_dword v[0:1], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: s_mov_b32 s32, s33
+; GFX9-NEXT: v_readlane_b32 s34, v40, 2
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: s_mov_b32 s33, s34
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %val = call amdgpu_gfx i32 @external_gfx_i32_func_void()
+ store volatile i32 %val, ptr addrspace(1) poison
+ ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_i32_imm_inreg:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s34, s33
+; GFX9-NEXT: s_mov_b32 s33, s32
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: v_writelane_b32 v40, s34, 3
+; GFX9-NEXT: s_addk_i32 s32, 0x400
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 1
+; GFX9-NEXT: s_getpc_b64 s[34:35]
+; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_void_func_i32_inreg at rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_void_func_i32_inreg at rel32@hi+12
+; GFX9-NEXT: s_mov_b32 s4, 42
+; GFX9-NEXT: v_writelane_b32 v40, s31, 2
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: v_readlane_b32 s31, v40, 2
+; GFX9-NEXT: v_readlane_b32 s30, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: s_mov_b32 s32, s33
+; GFX9-NEXT: v_readlane_b32 s34, v40, 3
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: s_mov_b32 s33, s34
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42)
+ ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_struct_i8_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s34, s33
+; GFX9-NEXT: s_mov_b32 s33, s32
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: v_writelane_b32 v40, s34, 2
+; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_addk_i32 s32, 0x400
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_ubyte v0, v2, s[34:35]
+; GFX9-NEXT: global_load_dword v1, v2, s[34:35] offset:4
+; GFX9-NEXT: s_getpc_b64 s[34:35]
+; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_void_func_struct_i8_i32 at rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_void_func_struct_i8_i32 at rel32@hi+12
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: s_mov_b32 s32, s33
+; GFX9-NEXT: v_readlane_b32 s34, v40, 2
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: s_mov_b32 s33, s34
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
+ %val = load { i8, i32 }, ptr addrspace(1) %ptr0
+ call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val)
+ ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_struct_i8_i32_inreg:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s34, s33
+; GFX9-NEXT: s_mov_b32 s33, s32
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: v_writelane_b32 v40, s34, 4
+; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: s_addk_i32 s32, 0x400
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_ubyte v1, v0, s[34:35]
+; GFX9-NEXT: global_load_dword v2, v0, s[34:35] offset:4
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s30, 2
+; GFX9-NEXT: s_getpc_b64 s[34:35]
+; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_void_func_struct_i8_i32_inreg at rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_void_func_struct_i8_i32_inreg at rel32@hi+12
+; GFX9-NEXT: v_writelane_b32 v40, s31, 3
+; GFX9-NEXT: s_waitcnt vmcnt(1)
+; GFX9-NEXT: v_readfirstlane_b32 s4, v1
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_readfirstlane_b32 s5, v2
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: v_readlane_b32 s31, v40, 3
+; GFX9-NEXT: v_readlane_b32 s30, v40, 2
+; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: s_mov_b32 s32, s33
+; GFX9-NEXT: v_readlane_b32 s34, v40, 4
+; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[36:37]
+; GFX9-NEXT: s_mov_b32 s33, s34
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
+ %val = load { i8, i32 }, ptr addrspace(1) %ptr0
+ call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val)
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind noinline }
More information about the llvm-commits
mailing list