[llvm] [NFC][AMDGPU][GISel] Precommit GlobalISel specific tests for call instruction (PR #165898)

Chinmay Deshpande via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 6 09:57:32 PST 2025


https://github.com/chinmaydd updated https://github.com/llvm/llvm-project/pull/165898

>From 033db646960b7a01460a5264308a28272c4ec26d Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <ChinmayDiwakar.Deshpande at amd.com>
Date: Fri, 31 Oct 2025 14:19:12 -0400
Subject: [PATCH 1/4] [NFC][AMDGPU][GISel] Precommit GlobalISel specific tests
 for call instruction

---
 llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll | 398 ++++++++++++++++++++
 1 file changed, 398 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll
new file mode 100644
index 0000000000000..054bc9df0ee60
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll
@@ -0,0 +1,398 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
+
+declare hidden void @external_void_func_void() #0
+declare hidden void @external_void_func_i32(i32) #0
+declare hidden void @external_void_func_i32_inreg(i32 inreg) #0
+declare hidden i32 @external_i32_func_void() #0
+
+declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
+
+declare hidden amdgpu_gfx void @external_gfx_void_func_void() #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_i32(i32) #0
+declare hidden amdgpu_gfx i32 @external_gfx_i32_func_void() #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg) #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 }) #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg) #0
+
+define amdgpu_kernel void @test_call_external_void_func_void() #0 {
+; GFX9-LABEL: test_call_external_void_func_void:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; GFX9-NEXT:    s_add_u32 s0, s0, s17
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GFX9-NEXT:    s_mov_b32 s13, s15
+; GFX9-NEXT:    s_mov_b32 s12, s14
+; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT:    s_getpc_b64 s[18:19]
+; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_void at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_void at rel32@hi+12
+; GFX9-NEXT:    s_mov_b32 s14, s16
+; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT:    s_endpgm
+  call void @external_void_func_void()
+  ret void
+}
+
+define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
+; GFX9-LABEL: test_call_external_void_func_i32_imm:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; GFX9-NEXT:    s_add_u32 s0, s0, s17
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-NEXT:    s_add_u32 s8, s8, 8
+; GFX9-NEXT:    s_addc_u32 s9, s9, 0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GFX9-NEXT:    s_mov_b32 s13, s15
+; GFX9-NEXT:    s_mov_b32 s12, s14
+; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT:    s_getpc_b64 s[18:19]
+; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_i32 at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_i32 at rel32@hi+12
+; GFX9-NEXT:    v_mov_b32_e32 v0, 42
+; GFX9-NEXT:    s_mov_b32 s14, s16
+; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT:    s_endpgm
+  call void @external_void_func_i32(i32 42)
+  ret void
+}
+
+define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
+; GFX9-LABEL: test_call_external_i32_func_void:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; GFX9-NEXT:    s_add_u32 s0, s0, s17
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GFX9-NEXT:    s_mov_b32 s13, s15
+; GFX9-NEXT:    s_mov_b32 s12, s14
+; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT:    s_getpc_b64 s[18:19]
+; GFX9-NEXT:    s_add_u32 s18, s18, external_i32_func_void at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s19, s19, external_i32_func_void at rel32@hi+12
+; GFX9-NEXT:    s_mov_b32 s14, s16
+; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT:    global_store_dword v[0:1], v0, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_endpgm
+  %val = call i32 @external_i32_func_void()
+  store volatile i32 %val, ptr addrspace(1) poison
+  ret void
+}
+
+define amdgpu_kernel void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
+; GFX9-LABEL: test_call_external_void_func_i32_inreg:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s17, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[18:19]
+; GFX9-NEXT:    v_writelane_b32 v40, s17, 2
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX9-NEXT:    s_getpc_b64 s[18:19]
+; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_i32_inreg at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_i32_inreg at rel32@hi+12
+; GFX9-NEXT:    s_mov_b32 s0, s16
+; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
+; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
+; GFX9-NEXT:    s_mov_b32 s32, s33
+; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
+; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
+; GFX9-NEXT:    s_mov_b32 s33, s4
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  call void @external_void_func_i32_inreg(i32 inreg %arg)
+  ret void
+}
+
+define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
+; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; GFX9-NEXT:    s_add_u32 s0, s0, s17
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-NEXT:    v_mov_b32_e32 v3, 3
+; GFX9-NEXT:    buffer_store_byte v3, off, s[0:3], 0
+; GFX9-NEXT:    v_mov_b32_e32 v3, 8
+; GFX9-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:4
+; GFX9-NEXT:    buffer_load_dword v3, off, s[0:3], 0
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    buffer_load_dword v4, off, s[0:3], 0 offset:4
+; GFX9-NEXT:    s_add_u32 s8, s8, 8
+; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GFX9-NEXT:    s_addc_u32 s9, s9, 0
+; GFX9-NEXT:    s_movk_i32 s32, 0x800
+; GFX9-NEXT:    s_mov_b32 s13, s15
+; GFX9-NEXT:    s_mov_b32 s12, s14
+; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
+; GFX9-NEXT:    s_getpc_b64 s[18:19]
+; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@hi+12
+; GFX9-NEXT:    v_mov_b32_e32 v0, 8
+; GFX9-NEXT:    s_mov_b32 s14, s16
+; GFX9-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-NEXT:    buffer_store_dword v3, off, s[0:3], s32
+; GFX9-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:4
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 offset:8
+; GFX9-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:12
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    global_store_byte v[0:1], v0, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    global_store_dword v[0:1], v1, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_endpgm
+  %in.val = alloca { i8, i32 }, align 4, addrspace(5)
+  %out.val = alloca { i8, i32 }, align 4, addrspace(5)
+  %in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0
+  %in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1
+  store i8 3, ptr addrspace(5) %in.gep0
+  store i32 8, ptr addrspace(5) %in.gep1
+  call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) %out.val, ptr addrspace(5) %in.val)
+  %out.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 0
+  %out.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 1
+  %out.val0 = load i8, ptr addrspace(5) %out.gep0
+  %out.val1 = load i32, ptr addrspace(5) %out.gep1
+  store volatile i8 %out.val0, ptr addrspace(1) poison
+  store volatile i32 %out.val1, ptr addrspace(1) poison
+  ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_void:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s34, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    v_writelane_b32 v40, s34, 2
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX9-NEXT:    s_getpc_b64 s[34:35]
+; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_void_func_void at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_void_func_void at rel32@hi+12
+; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
+; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
+; GFX9-NEXT:    s_mov_b32 s32, s33
+; GFX9-NEXT:    v_readlane_b32 s34, v40, 2
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    s_mov_b32 s33, s34
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  call amdgpu_gfx void @external_gfx_void_func_void()
+  ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_i32_imm:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s34, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    v_writelane_b32 v40, s34, 2
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX9-NEXT:    s_getpc_b64 s[34:35]
+; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_void_func_i32 at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_void_func_i32 at rel32@hi+12
+; GFX9-NEXT:    v_mov_b32_e32 v0, 42
+; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
+; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
+; GFX9-NEXT:    s_mov_b32 s32, s33
+; GFX9-NEXT:    v_readlane_b32 s34, v40, 2
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    s_mov_b32 s33, s34
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  call amdgpu_gfx void @external_gfx_void_func_i32(i32 42)
+  ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 {
+; GFX9-LABEL: test_gfx_call_external_i32_func_void:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s34, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    v_writelane_b32 v40, s34, 2
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX9-NEXT:    s_getpc_b64 s[34:35]
+; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_i32_func_void at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_i32_func_void at rel32@hi+12
+; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT:    global_store_dword v[0:1], v0, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
+; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
+; GFX9-NEXT:    s_mov_b32 s32, s33
+; GFX9-NEXT:    v_readlane_b32 s34, v40, 2
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    s_mov_b32 s33, s34
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %val = call amdgpu_gfx i32 @external_gfx_i32_func_void()
+  store volatile i32 %val, ptr addrspace(1) poison
+  ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_i32_imm_inreg:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s34, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    v_writelane_b32 v40, s34, 3
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
+; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
+; GFX9-NEXT:    s_getpc_b64 s[34:35]
+; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_void_func_i32_inreg at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_void_func_i32_inreg at rel32@hi+12
+; GFX9-NEXT:    s_mov_b32 s4, 42
+; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
+; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
+; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
+; GFX9-NEXT:    s_mov_b32 s32, s33
+; GFX9-NEXT:    v_readlane_b32 s34, v40, 3
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    s_mov_b32 s33, s34
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42)
+  ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_struct_i8_i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s34, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    v_writelane_b32 v40, s34, 2
+; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_ubyte v0, v2, s[34:35]
+; GFX9-NEXT:    global_load_dword v1, v2, s[34:35] offset:4
+; GFX9-NEXT:    s_getpc_b64 s[34:35]
+; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_void_func_struct_i8_i32 at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_void_func_struct_i8_i32 at rel32@hi+12
+; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
+; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
+; GFX9-NEXT:    s_mov_b32 s32, s33
+; GFX9-NEXT:    v_readlane_b32 s34, v40, 2
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    s_mov_b32 s33, s34
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
+  %val = load { i8, i32 }, ptr addrspace(1) %ptr0
+  call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val)
+  ret void
+}
+
+define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 {
+; GFX9-LABEL: test_gfx_call_external_void_func_struct_i8_i32_inreg:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s34, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    v_writelane_b32 v40, s34, 4
+; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_ubyte v1, v0, s[34:35]
+; GFX9-NEXT:    global_load_dword v2, v0, s[34:35] offset:4
+; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
+; GFX9-NEXT:    v_writelane_b32 v40, s30, 2
+; GFX9-NEXT:    s_getpc_b64 s[34:35]
+; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_void_func_struct_i8_i32_inreg at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_void_func_struct_i8_i32_inreg at rel32@hi+12
+; GFX9-NEXT:    v_writelane_b32 v40, s31, 3
+; GFX9-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-NEXT:    v_readfirstlane_b32 s4, v1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_readfirstlane_b32 s5, v2
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT:    v_readlane_b32 s31, v40, 3
+; GFX9-NEXT:    v_readlane_b32 s30, v40, 2
+; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
+; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
+; GFX9-NEXT:    s_mov_b32 s32, s33
+; GFX9-NEXT:    v_readlane_b32 s34, v40, 4
+; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
+; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
+; GFX9-NEXT:    s_mov_b32 s33, s34
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
+  %val = load { i8, i32 }, ptr addrspace(1) %ptr0
+  call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val)
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind noinline }

>From cfcf51805db915b643667df150d0c7b37bd03166 Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <ChinmayDiwakar.Deshpande at amd.com>
Date: Tue, 4 Nov 2025 17:19:19 -0500
Subject: [PATCH 2/4] [NFC][AMDGPU]GISel] Add RUN lines to call tests

---
 llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll   |  398 --
 .../CodeGen/AMDGPU/call-args-inreg-bfloat.ll  |  127 +
 llvm/test/CodeGen/AMDGPU/call-args-inreg.ll   |  132 +-
 .../CodeGen/AMDGPU/call-argument-types.ll     | 3578 +++++++++++------
 llvm/test/CodeGen/AMDGPU/call-c-function.ll   |   61 +-
 llvm/test/CodeGen/AMDGPU/call-constexpr.ll    |  343 +-
 .../CodeGen/AMDGPU/call-defs-mode-register.ll |   95 +-
 llvm/test/CodeGen/AMDGPU/call-encoding.ll     |    6 +-
 .../AMDGPU/call-graph-register-usage.ll       |   15 +-
 .../AMDGPU/call-preserved-registers.ll        |   90 +-
 llvm/test/CodeGen/AMDGPU/call-return-types.ll |   11 +-
 llvm/test/CodeGen/AMDGPU/call-skip.ll         |  112 +-
 llvm/test/CodeGen/AMDGPU/call-waitcnt.ll      |  211 +-
 13 files changed, 3156 insertions(+), 2023 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/call-args-inreg-bfloat.ll

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll
deleted file mode 100644
index 054bc9df0ee60..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll
+++ /dev/null
@@ -1,398 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
-
-declare hidden void @external_void_func_void() #0
-declare hidden void @external_void_func_i32(i32) #0
-declare hidden void @external_void_func_i32_inreg(i32 inreg) #0
-declare hidden i32 @external_i32_func_void() #0
-
-declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
-
-declare hidden amdgpu_gfx void @external_gfx_void_func_void() #0
-declare hidden amdgpu_gfx void @external_gfx_void_func_i32(i32) #0
-declare hidden amdgpu_gfx i32 @external_gfx_i32_func_void() #0
-declare hidden amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg) #0
-declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 }) #0
-declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg) #0
-
-define amdgpu_kernel void @test_call_external_void_func_void() #0 {
-; GFX9-LABEL: test_call_external_void_func_void:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
-; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
-; GFX9-NEXT:    s_add_u32 s0, s0, s17
-; GFX9-NEXT:    s_addc_u32 s1, s1, 0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; GFX9-NEXT:    s_mov_b32 s13, s15
-; GFX9-NEXT:    s_mov_b32 s12, s14
-; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT:    s_getpc_b64 s[18:19]
-; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_void at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_void at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s14, s16
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT:    s_endpgm
-  call void @external_void_func_void()
-  ret void
-}
-
-define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
-; GFX9-LABEL: test_call_external_void_func_i32_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
-; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
-; GFX9-NEXT:    s_add_u32 s0, s0, s17
-; GFX9-NEXT:    s_addc_u32 s1, s1, 0
-; GFX9-NEXT:    s_add_u32 s8, s8, 8
-; GFX9-NEXT:    s_addc_u32 s9, s9, 0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; GFX9-NEXT:    s_mov_b32 s13, s15
-; GFX9-NEXT:    s_mov_b32 s12, s14
-; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT:    s_getpc_b64 s[18:19]
-; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_i32 at rel32@hi+12
-; GFX9-NEXT:    v_mov_b32_e32 v0, 42
-; GFX9-NEXT:    s_mov_b32 s14, s16
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT:    s_endpgm
-  call void @external_void_func_i32(i32 42)
-  ret void
-}
-
-define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
-; GFX9-LABEL: test_call_external_i32_func_void:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
-; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
-; GFX9-NEXT:    s_add_u32 s0, s0, s17
-; GFX9-NEXT:    s_addc_u32 s1, s1, 0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; GFX9-NEXT:    s_mov_b32 s13, s15
-; GFX9-NEXT:    s_mov_b32 s12, s14
-; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT:    s_getpc_b64 s[18:19]
-; GFX9-NEXT:    s_add_u32 s18, s18, external_i32_func_void at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s19, s19, external_i32_func_void at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s14, s16
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT:    global_store_dword v[0:1], v0, off
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_endpgm
-  %val = call i32 @external_i32_func_void()
-  store volatile i32 %val, ptr addrspace(1) poison
-  ret void
-}
-
-define amdgpu_kernel void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_i32_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s17, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-NEXT:    v_writelane_b32 v40, s17, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[18:19]
-; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_i32_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_i32_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s0, s16
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  call void @external_void_func_i32_inreg(i32 inreg %arg)
-  ret void
-}
-
-define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
-; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
-; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
-; GFX9-NEXT:    s_add_u32 s0, s0, s17
-; GFX9-NEXT:    s_addc_u32 s1, s1, 0
-; GFX9-NEXT:    v_mov_b32_e32 v3, 3
-; GFX9-NEXT:    buffer_store_byte v3, off, s[0:3], 0
-; GFX9-NEXT:    v_mov_b32_e32 v3, 8
-; GFX9-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:4
-; GFX9-NEXT:    buffer_load_dword v3, off, s[0:3], 0
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_load_dword v4, off, s[0:3], 0 offset:4
-; GFX9-NEXT:    s_add_u32 s8, s8, 8
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; GFX9-NEXT:    s_addc_u32 s9, s9, 0
-; GFX9-NEXT:    s_movk_i32 s32, 0x800
-; GFX9-NEXT:    s_mov_b32 s13, s15
-; GFX9-NEXT:    s_mov_b32 s12, s14
-; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT:    s_getpc_b64 s[18:19]
-; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@hi+12
-; GFX9-NEXT:    v_mov_b32_e32 v0, 8
-; GFX9-NEXT:    s_mov_b32 s14, s16
-; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    buffer_store_dword v3, off, s[0:3], s32
-; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:4
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 offset:8
-; GFX9-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:12
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_store_byte v[0:1], v0, off
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    global_store_dword v[0:1], v1, off
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_endpgm
-  %in.val = alloca { i8, i32 }, align 4, addrspace(5)
-  %out.val = alloca { i8, i32 }, align 4, addrspace(5)
-  %in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0
-  %in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1
-  store i8 3, ptr addrspace(5) %in.gep0
-  store i32 8, ptr addrspace(5) %in.gep1
-  call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) %out.val, ptr addrspace(5) %in.val)
-  %out.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 0
-  %out.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 1
-  %out.val0 = load i8, ptr addrspace(5) %out.gep0
-  %out.val1 = load i32, ptr addrspace(5) %out.gep1
-  store volatile i8 %out.val0, ptr addrspace(1) poison
-  store volatile i32 %out.val1, ptr addrspace(1) poison
-  ret void
-}
-
-define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
-; GFX9-LABEL: test_gfx_call_external_void_func_void:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s34, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX9-NEXT:    v_writelane_b32 v40, s34, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[34:35]
-; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_void_func_void at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_void_func_void at rel32@hi+12
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s34, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX9-NEXT:    s_mov_b32 s33, s34
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  call amdgpu_gfx void @external_gfx_void_func_void()
-  ret void
-}
-
-define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
-; GFX9-LABEL: test_gfx_call_external_void_func_i32_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s34, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX9-NEXT:    v_writelane_b32 v40, s34, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[34:35]
-; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_void_func_i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_void_func_i32 at rel32@hi+12
-; GFX9-NEXT:    v_mov_b32_e32 v0, 42
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s34, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX9-NEXT:    s_mov_b32 s33, s34
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  call amdgpu_gfx void @external_gfx_void_func_i32(i32 42)
-  ret void
-}
-
-define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 {
-; GFX9-LABEL: test_gfx_call_external_i32_func_void:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s34, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX9-NEXT:    v_writelane_b32 v40, s34, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[34:35]
-; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_i32_func_void at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_i32_func_void at rel32@hi+12
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
-; GFX9-NEXT:    global_store_dword v[0:1], v0, off
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s34, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX9-NEXT:    s_mov_b32 s33, s34
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  %val = call amdgpu_gfx i32 @external_gfx_i32_func_void()
-  store volatile i32 %val, ptr addrspace(1) poison
-  ret void
-}
-
-define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 {
-; GFX9-LABEL: test_gfx_call_external_void_func_i32_imm_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s34, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX9-NEXT:    v_writelane_b32 v40, s34, 3
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
-; GFX9-NEXT:    s_getpc_b64 s[34:35]
-; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_void_func_i32_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_void_func_i32_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s4, 42
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s34, v40, 3
-; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX9-NEXT:    s_mov_b32 s33, s34
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42)
-  ret void
-}
-
-define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
-; GFX9-LABEL: test_gfx_call_external_void_func_struct_i8_i32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s34, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX9-NEXT:    v_writelane_b32 v40, s34, 2
-; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_ubyte v0, v2, s[34:35]
-; GFX9-NEXT:    global_load_dword v1, v2, s[34:35] offset:4
-; GFX9-NEXT:    s_getpc_b64 s[34:35]
-; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_void_func_struct_i8_i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_void_func_struct_i8_i32 at rel32@hi+12
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s34, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX9-NEXT:    s_mov_b32 s33, s34
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  %ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
-  %val = load { i8, i32 }, ptr addrspace(1) %ptr0
-  call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val)
-  ret void
-}
-
-define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 {
-; GFX9-LABEL: test_gfx_call_external_void_func_struct_i8_i32_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s34, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX9-NEXT:    v_writelane_b32 v40, s34, 4
-; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_ubyte v1, v0, s[34:35]
-; GFX9-NEXT:    global_load_dword v2, v0, s[34:35] offset:4
-; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 2
-; GFX9-NEXT:    s_getpc_b64 s[34:35]
-; GFX9-NEXT:    s_add_u32 s34, s34, external_gfx_void_func_struct_i8_i32_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s35, s35, external_gfx_void_func_struct_i8_i32_inreg at rel32@hi+12
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 3
-; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_readfirstlane_b32 s4, v1
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_readfirstlane_b32 s5, v2
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 3
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 2
-; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s34, v40, 4
-; GFX9-NEXT:    s_or_saveexec_b64 s[36:37], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[36:37]
-; GFX9-NEXT:    s_mov_b32 s33, s34
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  %ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
-  %val = load { i8, i32 }, ptr addrspace(1) %ptr0
-  call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val)
-  ret void
-}
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind noinline }
diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg-bfloat.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg-bfloat.ll
new file mode 100644
index 0000000000000..46dbb4d15b4ee
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg-bfloat.ll
@@ -0,0 +1,127 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
+
+declare hidden void @external_void_func_bf16_inreg(bfloat inreg) #0
+declare hidden void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg) #0
+
+define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
+; GFX9-LABEL: test_call_external_void_func_bf16_inreg:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s17, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[18:19]
+; GFX9-NEXT:    v_writelane_b32 v40, s17, 2
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX9-NEXT:    s_getpc_b64 s[18:19]
+; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_bf16_inreg at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_bf16_inreg at rel32@hi+12
+; GFX9-NEXT:    s_mov_b32 s0, s16
+; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
+; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
+; GFX9-NEXT:    s_mov_b32 s32, s33
+; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
+; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
+; GFX9-NEXT:    s_mov_b32 s33, s4
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: test_call_external_void_func_bf16_inreg:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_mov_b32 s1, s33
+; GFX11-NEXT:    s_mov_b32 s33, s32
+; GFX11-NEXT:    s_or_saveexec_b32 s2, -1
+; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
+; GFX11-NEXT:    s_mov_b32 exec_lo, s2
+; GFX11-NEXT:    v_writelane_b32 v40, s1, 2
+; GFX11-NEXT:    s_add_i32 s32, s32, 16
+; GFX11-NEXT:    s_getpc_b64 s[2:3]
+; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_bf16_inreg at rel32@lo+4
+; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_bf16_inreg at rel32@hi+12
+; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
+; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
+; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
+; GFX11-NEXT:    s_mov_b32 s32, s33
+; GFX11-NEXT:    v_readlane_b32 s0, v40, 2
+; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
+; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
+; GFX11-NEXT:    s_mov_b32 exec_lo, s1
+; GFX11-NEXT:    s_mov_b32 s33, s0
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  call void @external_void_func_bf16_inreg(bfloat inreg %arg)
+  ret void
+}
+
+define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) #0 {
+; GFX9-LABEL: test_call_external_void_func_v2bf16_inreg:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s17, s33
+; GFX9-NEXT:    s_mov_b32 s33, s32
+; GFX9-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT:    s_mov_b64 exec, s[18:19]
+; GFX9-NEXT:    v_writelane_b32 v40, s17, 2
+; GFX9-NEXT:    s_addk_i32 s32, 0x400
+; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX9-NEXT:    s_getpc_b64 s[18:19]
+; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_v2bf16_inreg at rel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_v2bf16_inreg at rel32@hi+12
+; GFX9-NEXT:    s_mov_b32 s0, s16
+; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
+; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
+; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
+; GFX9-NEXT:    s_mov_b32 s32, s33
+; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
+; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
+; GFX9-NEXT:    s_mov_b32 s33, s4
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: test_call_external_void_func_v2bf16_inreg:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_mov_b32 s1, s33
+; GFX11-NEXT:    s_mov_b32 s33, s32
+; GFX11-NEXT:    s_or_saveexec_b32 s2, -1
+; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
+; GFX11-NEXT:    s_mov_b32 exec_lo, s2
+; GFX11-NEXT:    v_writelane_b32 v40, s1, 2
+; GFX11-NEXT:    s_add_i32 s32, s32, 16
+; GFX11-NEXT:    s_getpc_b64 s[2:3]
+; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v2bf16_inreg at rel32@lo+4
+; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v2bf16_inreg at rel32@hi+12
+; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
+; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
+; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
+; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
+; GFX11-NEXT:    s_mov_b32 s32, s33
+; GFX11-NEXT:    v_readlane_b32 s0, v40, 2
+; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
+; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
+; GFX11-NEXT:    s_mov_b32 exec_lo, s1
+; GFX11-NEXT:    s_mov_b32 s33, s0
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  call void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg)
+  ret void
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
index d1cede64ce71d..3d3a4041c89bb 100644
--- a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GISEL %s
 
 declare hidden void @external_void_func_i8_inreg(i8 inreg) #0
 declare hidden void @external_void_func_i16_inreg(i32 inreg) #0
@@ -12,11 +14,9 @@ declare hidden void @external_void_func_v4i32_inreg(<4 x i32> inreg) #0
 declare hidden void @external_void_func_v8i32_inreg(<8 x i32> inreg) #0
 declare hidden void @external_void_func_v16i32_inreg(<16 x i32> inreg) #0
 declare hidden void @external_void_func_f16_inreg(half inreg) #0
-declare hidden void @external_void_func_bf16_inreg(bfloat inreg) #0
 declare hidden void @external_void_func_f32_inreg(float inreg) #0
 declare hidden void @external_void_func_f64_inreg(double inreg) #0
 declare hidden void @external_void_func_v2f16_inreg(<2 x half> inreg) #0
-declare hidden void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg) #0
 declare hidden void @external_void_func_v3f16_inreg(<3 x half> inreg) #0
 declare hidden void @external_void_func_v4f16_inreg(<4 x half> inreg) #0
 
@@ -585,66 +585,6 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_bf16_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s17, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-NEXT:    v_writelane_b32 v40, s17, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[18:19]
-; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_bf16_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_bf16_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s0, s16
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: test_call_external_void_func_bf16_inreg:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s1, s33
-; GFX11-NEXT:    s_mov_b32 s33, s32
-; GFX11-NEXT:    s_or_saveexec_b32 s2, -1
-; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
-; GFX11-NEXT:    s_mov_b32 exec_lo, s2
-; GFX11-NEXT:    v_writelane_b32 v40, s1, 2
-; GFX11-NEXT:    s_add_i32 s32, s32, 16
-; GFX11-NEXT:    s_getpc_b64 s[2:3]
-; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_bf16_inreg at rel32@lo+4
-; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_bf16_inreg at rel32@hi+12
-; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX11-NEXT:    s_mov_b32 s32, s33
-; GFX11-NEXT:    v_readlane_b32 s0, v40, 2
-; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
-; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
-; GFX11-NEXT:    s_mov_b32 exec_lo, s1
-; GFX11-NEXT:    s_mov_b32 s33, s0
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  call void @external_void_func_bf16_inreg(bfloat inreg %arg)
-  ret void
-}
-
 define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 {
 ; GFX9-LABEL: test_call_external_void_func_f32_inreg:
 ; GFX9:       ; %bb.0:
@@ -826,67 +766,6 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0
   ret void
 }
 
-
-define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v2bf16_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s17, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[18:19]
-; GFX9-NEXT:    v_writelane_b32 v40, s17, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[18:19]
-; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_v2bf16_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_v2bf16_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s0, s16
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: test_call_external_void_func_v2bf16_inreg:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s1, s33
-; GFX11-NEXT:    s_mov_b32 s33, s32
-; GFX11-NEXT:    s_or_saveexec_b32 s2, -1
-; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
-; GFX11-NEXT:    s_mov_b32 exec_lo, s2
-; GFX11-NEXT:    v_writelane_b32 v40, s1, 2
-; GFX11-NEXT:    s_add_i32 s32, s32, 16
-; GFX11-NEXT:    s_getpc_b64 s[2:3]
-; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v2bf16_inreg at rel32@lo+4
-; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v2bf16_inreg at rel32@hi+12
-; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX11-NEXT:    s_mov_b32 s32, s33
-; GFX11-NEXT:    v_readlane_b32 s0, v40, 2
-; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
-; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
-; GFX11-NEXT:    s_mov_b32 exec_lo, s1
-; GFX11-NEXT:    s_mov_b32 s33, s0
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  call void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg)
-  ret void
-}
-
 define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 {
 ; GFX9-LABEL: test_call_external_void_func_v3f16_inreg:
 ; GFX9:       ; %bb.0:
@@ -1529,3 +1408,6 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inre
 attributes #0 = { nounwind }
 attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" }
 
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GISEL: {{.*}}
+; SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
index 832e43f1e1973..1bc334f27f0fa 100644
--- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
@@ -1,10 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=CI %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn -global-isel=0 -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,SDAG %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=HSA %s
+; RUN: llc -mtriple=amdgcn -global-isel=1 -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GISEL %s
 
 declare hidden void @external_void_func_i1(i1) #0
 declare hidden void @external_void_func_i1_signext(i1 signext) #0
@@ -100,24 +101,24 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_i1_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i1 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i1 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 1
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i1_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_i1 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_i1 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_i1_imm:
 ; GFX11:       ; %bb.0:
@@ -145,6 +146,25 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i1_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_i1 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_i1 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_i1(i1 true)
   ret void
 }
@@ -196,28 +216,28 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_i1_signext:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s5
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i1_signext at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i1_signext at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    v_bfe_i32 v0, v0, 0, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i1_signext:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s5
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_i1_signext at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_i1_signext at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 1
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_i1_signext:
 ; GFX11:       ; %bb.0:
@@ -253,6 +273,29 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
 ; HSA-NEXT:    v_bfe_i32 v0, v0, 0, 1
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i1_signext:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s2, -1
+; GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; GISEL-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s5
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_i1_signext at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_i1_signext at rel32@hi+12
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 1
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %var = load volatile i1, ptr addrspace(1) poison
   call void @external_void_func_i1_signext(i1 signext %var)
   ret void
@@ -306,28 +349,28 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_i1_zeroext:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s5
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i1_zeroext at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i1_zeroext at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i1_zeroext:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s5
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_i1_zeroext at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_i1_zeroext at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_i1_zeroext:
 ; GFX11:       ; %bb.0:
@@ -363,6 +406,29 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
 ; HSA-NEXT:    v_and_b32_e32 v0, 1, v0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i1_zeroext:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s2, -1
+; GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; GISEL-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s5
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_i1_zeroext at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_i1_zeroext at rel32@hi+12
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %var = load volatile i1, ptr addrspace(1) poison
   call void @external_void_func_i1_zeroext(i1 zeroext %var)
   ret void
@@ -407,24 +473,24 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_i8_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s5
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i8 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i8 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i8_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s5
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_i8 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_i8 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0x7b
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-TRUE16-LABEL: test_call_external_void_func_i8_imm:
 ; GFX11-TRUE16:       ; %bb.0:
@@ -463,6 +529,25 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i8_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s5
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_i8 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_i8 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_i8(i8 123)
   ret void
 }
@@ -513,27 +598,27 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_i8_signext:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    buffer_load_sbyte v0, off, s[0:3], 0 glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s5
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i8_signext at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i8_signext at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i8_signext:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    buffer_load_sbyte v0, off, s[0:3], 0 glc
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s5
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_i8_signext at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_i8_signext at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_i8_signext:
 ; GFX11:       ; %bb.0:
@@ -567,6 +652,28 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i8_signext:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s2, -1
+; GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; GISEL-NEXT:    buffer_load_sbyte v0, off, s[0:3], 0 glc
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s5
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_i8_signext at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_i8_signext at rel32@hi+12
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %var = load volatile i8, ptr addrspace(1) poison
   call void @external_void_func_i8_signext(i8 signext %var)
   ret void
@@ -617,27 +724,27 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_i8_zeroext:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s5
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i8_zeroext at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i8_zeroext at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i8_zeroext:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s5
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_i8_zeroext at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_i8_zeroext at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_i8_zeroext:
 ; GFX11:       ; %bb.0:
@@ -671,6 +778,28 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i8_zeroext:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s2, -1
+; GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; GISEL-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s5
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_i8_zeroext at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_i8_zeroext at rel32@hi+12
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %var = load volatile i8, ptr addrspace(1) poison
   call void @external_void_func_i8_zeroext(i8 zeroext %var)
   ret void
@@ -715,24 +844,24 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_i16_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i16 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i16 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i16_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_i16 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_i16 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0x7b
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-TRUE16-LABEL: test_call_external_void_func_i16_imm:
 ; GFX11-TRUE16:       ; %bb.0:
@@ -771,6 +900,25 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i16_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_i16 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_i16 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_i16(i16 123)
   ret void
 }
@@ -820,27 +968,27 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_i16_signext:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    buffer_load_sshort v0, off, s[0:3], 0 glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s5
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i16_signext at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i16_signext at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i16_signext:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    buffer_load_sshort v0, off, s[0:3], 0 glc
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s5
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_i16_signext at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_i16_signext at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_i16_signext:
 ; GFX11:       ; %bb.0:
@@ -874,6 +1022,28 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i16_signext:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s2, -1
+; GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; GISEL-NEXT:    buffer_load_sshort v0, off, s[0:3], 0 glc
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s5
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_i16_signext at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_i16_signext at rel32@hi+12
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %var = load volatile i16, ptr addrspace(1) poison
   call void @external_void_func_i16_signext(i16 signext %var)
   ret void
@@ -924,27 +1094,27 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_i16_zeroext:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s5
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i16_zeroext at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i16_zeroext at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i16_zeroext:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 glc
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s5
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_i16_zeroext at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_i16_zeroext at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_i16_zeroext:
 ; GFX11:       ; %bb.0:
@@ -978,6 +1148,28 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i16_zeroext:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s2, -1
+; GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; GISEL-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 glc
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s5
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_i16_zeroext at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_i16_zeroext at rel32@hi+12
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %var = load volatile i16, ptr addrspace(1) poison
   call void @external_void_func_i16_zeroext(i16 zeroext %var)
   ret void
@@ -1022,24 +1214,24 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_i32_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s5
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 42
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i32_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s5
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_i32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 42
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_i32_imm:
 ; GFX11:       ; %bb.0:
@@ -1067,6 +1259,25 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i32_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s5
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_i32 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 42
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_i32(i32 42)
   ret void
 }
@@ -1112,25 +1323,25 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_i64_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i64 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i64 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_i64_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_i64 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_i64 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0x7b
+; SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_i64_imm:
 ; GFX11:       ; %bb.0:
@@ -1159,6 +1370,26 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_i64_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_i64 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_i64 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_i64(i64 123)
   ret void
 }
@@ -1208,27 +1439,27 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v2i64:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], 0
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v2i64:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], 0
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v2i64:
 ; GFX11:       ; %bb.0:
@@ -1262,6 +1493,31 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v2i64:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], 0
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %val = load <2 x i64>, ptr addrspace(1) null
   call void @external_void_func_v2i64(<2 x i64> %val)
   ret void
@@ -1312,27 +1568,27 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v2i64_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 1
-; GFX9-NEXT:    v_mov_b32_e32 v1, 2
-; GFX9-NEXT:    v_mov_b32_e32 v2, 3
-; GFX9-NEXT:    v_mov_b32_e32 v3, 4
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v2i64_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; SDAG-NEXT:    v_mov_b32_e32 v1, 2
+; SDAG-NEXT:    v_mov_b32_e32 v2, 3
+; SDAG-NEXT:    v_mov_b32_e32 v3, 4
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v2i64_imm:
 ; GFX11:       ; %bb.0:
@@ -1364,6 +1620,28 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v2i64_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; GISEL-NEXT:    v_mov_b32_e32 v1, 2
+; GISEL-NEXT:    v_mov_b32_e32 v2, 3
+; GISEL-NEXT:    v_mov_b32_e32 v3, 4
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
   ret void
 }
@@ -1417,29 +1695,29 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v3i64:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], 0
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3i64 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i64 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v4, 1
-; GFX9-NEXT:    v_mov_b32_e32 v5, 2
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3i64:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], 0
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v3i64 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i64 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v4, 1
+; SDAG-NEXT:    v_mov_b32_e32 v5, 2
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v3i64:
 ; GFX11:       ; %bb.0:
@@ -1476,6 +1754,33 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3i64:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], 0
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v3i64 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i64 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v4, 1
+; GISEL-NEXT:    v_mov_b32_e32 v5, 2
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %load = load <2 x i64>, ptr addrspace(1) null
   %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>
 
@@ -1536,31 +1841,31 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v4i64:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], 0
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v4i64 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i64 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v4, 1
-; GFX9-NEXT:    v_mov_b32_e32 v5, 2
-; GFX9-NEXT:    v_mov_b32_e32 v6, 3
-; GFX9-NEXT:    v_mov_b32_e32 v7, 4
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v4i64:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], 0
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v4i64 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i64 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v4, 1
+; SDAG-NEXT:    v_mov_b32_e32 v5, 2
+; SDAG-NEXT:    v_mov_b32_e32 v6, 3
+; SDAG-NEXT:    v_mov_b32_e32 v7, 4
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v4i64:
 ; GFX11:       ; %bb.0:
@@ -1600,6 +1905,35 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v4i64:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], 0
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v4i64 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i64 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v4, 1
+; GISEL-NEXT:    v_mov_b32_e32 v5, 2
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    v_mov_b32_e32 v6, 3
+; GISEL-NEXT:    v_mov_b32_e32 v7, 4
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %load = load <2 x i64>, ptr addrspace(1) null
   %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   call void @external_void_func_v4i64(<4 x i64> %val)
@@ -1645,24 +1979,24 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_f16_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_f16 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_f16 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0x4400
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_f16_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_f16 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_f16 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0x4400
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-TRUE16-LABEL: test_call_external_void_func_f16_imm:
 ; GFX11-TRUE16:       ; %bb.0:
@@ -1701,6 +2035,25 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_f16_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_f16 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_f16 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x4400
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_f16(half 4.0)
   ret void
 }
@@ -1744,24 +2097,24 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_f32_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_f32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_f32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 4.0
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_f32_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_f32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_f32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 4.0
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_f32_imm:
 ; GFX11:       ; %bb.0:
@@ -1789,6 +2142,25 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_f32_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_f32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_f32 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 4.0
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_f32(float 4.0)
   ret void
 }
@@ -1834,25 +2206,25 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v2f32_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v2f32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 1.0
-; GFX9-NEXT:    v_mov_b32_e32 v1, 2.0
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v2f32_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v2f32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 1.0
+; SDAG-NEXT:    v_mov_b32_e32 v1, 2.0
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v2f32_imm:
 ; GFX11:       ; %bb.0:
@@ -1881,6 +2253,26 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v2f32_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v2f32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f32 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 1.0
+; GISEL-NEXT:    v_mov_b32_e32 v1, 2.0
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
   ret void
 }
@@ -1928,26 +2320,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v3f32_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3f32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 1.0
-; GFX9-NEXT:    v_mov_b32_e32 v1, 2.0
-; GFX9-NEXT:    v_mov_b32_e32 v2, 4.0
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3f32_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v3f32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 1.0
+; SDAG-NEXT:    v_mov_b32_e32 v1, 2.0
+; SDAG-NEXT:    v_mov_b32_e32 v2, 4.0
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v3f32_imm:
 ; GFX11:       ; %bb.0:
@@ -1978,6 +2370,27 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3f32_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v3f32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f32 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 1.0
+; GISEL-NEXT:    v_mov_b32_e32 v1, 2.0
+; GISEL-NEXT:    v_mov_b32_e32 v2, 4.0
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>)
   ret void
 }
@@ -2029,28 +2442,28 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v5f32_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v5f32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v5f32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 1.0
-; GFX9-NEXT:    v_mov_b32_e32 v1, 2.0
-; GFX9-NEXT:    v_mov_b32_e32 v2, 4.0
-; GFX9-NEXT:    v_mov_b32_e32 v3, -1.0
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0.5
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v5f32_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v5f32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v5f32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 1.0
+; SDAG-NEXT:    v_mov_b32_e32 v1, 2.0
+; SDAG-NEXT:    v_mov_b32_e32 v2, 4.0
+; SDAG-NEXT:    v_mov_b32_e32 v3, -1.0
+; SDAG-NEXT:    v_mov_b32_e32 v4, 0.5
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v5f32_imm:
 ; GFX11:       ; %bb.0:
@@ -2084,6 +2497,29 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v5f32_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v5f32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v5f32 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 1.0
+; GISEL-NEXT:    v_mov_b32_e32 v1, 2.0
+; GISEL-NEXT:    v_mov_b32_e32 v2, 4.0
+; GISEL-NEXT:    v_mov_b32_e32 v3, -1.0
+; GISEL-NEXT:    v_mov_b32_e32 v4, 0.5
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
   ret void
 }
@@ -2129,25 +2565,25 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_f64_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_f64 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_f64 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0x40100000
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_f64_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_f64 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_f64 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-NEXT:    v_mov_b32_e32 v1, 0x40100000
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_f64_imm:
 ; GFX11:       ; %bb.0:
@@ -2176,6 +2612,26 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_f64_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_f64 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_f64 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0x40100000
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_f64(double 4.0)
   ret void
 }
@@ -2225,27 +2681,27 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v2f64_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v2f64 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f64 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-NEXT:    v_mov_b32_e32 v1, 2.0
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x40100000
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v2f64_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v2f64 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f64 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-NEXT:    v_mov_b32_e32 v1, 2.0
+; SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-NEXT:    v_mov_b32_e32 v3, 0x40100000
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v2f64_imm:
 ; GFX11:       ; %bb.0:
@@ -2277,6 +2733,28 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v2f64_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v2f64 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f64 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-NEXT:    v_mov_b32_e32 v1, 2.0
+; GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GISEL-NEXT:    v_mov_b32_e32 v3, 0x40100000
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
   ret void
 }
@@ -2330,29 +2808,29 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v3f64_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3f64 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f64 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-NEXT:    v_mov_b32_e32 v1, 2.0
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x40100000
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0
-; GFX9-NEXT:    v_mov_b32_e32 v5, 0x40200000
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3f64_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v3f64 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f64 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-NEXT:    v_mov_b32_e32 v1, 2.0
+; SDAG-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-NEXT:    v_mov_b32_e32 v3, 0x40100000
+; SDAG-NEXT:    v_mov_b32_e32 v4, 0
+; SDAG-NEXT:    v_mov_b32_e32 v5, 0x40200000
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v3f64_imm:
 ; GFX11:       ; %bb.0:
@@ -2387,6 +2865,30 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3f64_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v3f64 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f64 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-NEXT:    v_mov_b32_e32 v1, 2.0
+; GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GISEL-NEXT:    v_mov_b32_e32 v3, 0x40100000
+; GISEL-NEXT:    v_mov_b32_e32 v4, 0
+; GISEL-NEXT:    v_mov_b32_e32 v5, 0x40200000
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
   ret void
 }
@@ -2436,26 +2938,26 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v2i16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    buffer_load_dword v0, off, s[0:3], 0
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v2i16 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i16 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v2i16:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    buffer_load_dword v0, off, s[0:3], 0
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v2i16 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i16 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v2i16:
 ; GFX11:       ; %bb.0:
@@ -2487,6 +2989,27 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v2i16:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_load_dword s8, s[0:1], 0x0
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v2i16 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i16 at rel32@hi+12
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v0, s8
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %val = load <2 x i16>, ptr addrspace(1) poison
   call void @external_void_func_v2i16(<2 x i16> %val)
   ret void
@@ -2539,26 +3062,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v3i16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3i16:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v3i16:
 ; GFX11:       ; %bb.0:
@@ -2590,6 +3113,28 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3i16:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %val = load <3 x i16>, ptr addrspace(1) poison
   call void @external_void_func_v3i16(<3 x i16> %val)
   ret void
@@ -2643,26 +3188,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v3f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3f16:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v3f16:
 ; GFX11:       ; %bb.0:
@@ -2694,6 +3239,28 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3f16:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %val = load <3 x half>, ptr addrspace(1) poison
   call void @external_void_func_v3f16(<3 x half> %val)
   ret void
@@ -2741,25 +3308,25 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v3i16_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0x20001
-; GFX9-NEXT:    v_mov_b32_e32 v1, 3
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3i16_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0x20001
+; SDAG-NEXT:    v_mov_b32_e32 v1, 3
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v3i16_imm:
 ; GFX11:       ; %bb.0:
@@ -2788,6 +3355,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3i16_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x20001
+; GISEL-NEXT:    v_mov_b32_e32 v1, 3
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>)
   ret void
 }
@@ -2834,25 +3421,25 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v3f16_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0x40003c00
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0x4400
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3f16_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0x40003c00
+; SDAG-NEXT:    v_mov_b32_e32 v1, 0x4400
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v3f16_imm:
 ; GFX11:       ; %bb.0:
@@ -2882,6 +3469,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3f16_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x40003c00
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0x4400
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_v3f16(<3 x half> <half 1.0, half 2.0, half 4.0>)
   ret void
 }
@@ -2934,26 +3541,26 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v4i16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v4i16:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v4i16:
 ; GFX11:       ; %bb.0:
@@ -2985,6 +3592,28 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v4i16:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %val = load <4 x i16>, ptr addrspace(1) poison
   call void @external_void_func_v4i16(<4 x i16> %val)
   ret void
@@ -3033,25 +3662,25 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v4i16_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0x20001
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0x40003
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v4i16_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0x20001
+; SDAG-NEXT:    v_mov_b32_e32 v1, 0x40003
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v4i16_imm:
 ; GFX11:       ; %bb.0:
@@ -3081,6 +3710,26 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v4i16_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0x20001
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0x40003
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
   ret void
 }
@@ -3132,26 +3781,26 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v2f16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    buffer_load_dword v0, off, s[0:3], 0
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v2f16 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f16 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v2f16:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    buffer_load_dword v0, off, s[0:3], 0
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v2f16 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f16 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v2f16:
 ; GFX11:       ; %bb.0:
@@ -3183,6 +3832,27 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v2f16:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_load_dword s8, s[0:1], 0x0
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v2f16 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f16 at rel32@hi+12
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v0, s8
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %val = load <2 x half>, ptr addrspace(1) poison
   call void @external_void_func_v2f16(<2 x half> %val)
   ret void
@@ -3231,26 +3901,26 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v2i32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v2i32:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v2i32:
 ; GFX11:       ; %bb.0:
@@ -3282,6 +3952,28 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v2i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %val = load <2 x i32>, ptr addrspace(1) poison
   call void @external_void_func_v2i32(<2 x i32> %val)
   ret void
@@ -3328,25 +4020,25 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v2i32_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 1
-; GFX9-NEXT:    v_mov_b32_e32 v1, 2
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v2i32_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; SDAG-NEXT:    v_mov_b32_e32 v1, 2
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v2i32_imm:
 ; GFX11:       ; %bb.0:
@@ -3375,6 +4067,26 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v2i32_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; GISEL-NEXT:    v_mov_b32_e32 v1, 2
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
   ret void
 }
@@ -3422,26 +4134,26 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v3i32_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s5
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 3
-; GFX9-NEXT:    v_mov_b32_e32 v1, 4
-; GFX9-NEXT:    v_mov_b32_e32 v2, 5
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3i32_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s5
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v3i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 3
+; SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; SDAG-NEXT:    v_mov_b32_e32 v2, 5
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v3i32_imm:
 ; GFX11:       ; %bb.0:
@@ -3472,6 +4184,27 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3i32_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s5
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v3i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i32 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 3
+; GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GISEL-NEXT:    v_mov_b32_e32 v2, 5
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>)
   ret void
 }
@@ -3521,27 +4254,27 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v3i32_i32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s5
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3i32_i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i32_i32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 3
-; GFX9-NEXT:    v_mov_b32_e32 v1, 4
-; GFX9-NEXT:    v_mov_b32_e32 v2, 5
-; GFX9-NEXT:    v_mov_b32_e32 v3, 6
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v3i32_i32:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s5
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v3i32_i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i32_i32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 3
+; SDAG-NEXT:    v_mov_b32_e32 v1, 4
+; SDAG-NEXT:    v_mov_b32_e32 v2, 5
+; SDAG-NEXT:    v_mov_b32_e32 v3, 6
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v3i32_i32:
 ; GFX11:       ; %bb.0:
@@ -3573,6 +4306,28 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v3i32_i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s5
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v3i32_i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i32_i32 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 3
+; GISEL-NEXT:    v_mov_b32_e32 v1, 4
+; GISEL-NEXT:    v_mov_b32_e32 v2, 5
+; GISEL-NEXT:    v_mov_b32_e32 v3, 6
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
   ret void
 }
@@ -3620,26 +4375,26 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v4i32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v4i32:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v4i32:
 ; GFX11:       ; %bb.0:
@@ -3671,6 +4426,30 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v4i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %val = load <4 x i32>, ptr addrspace(1) poison
   call void @external_void_func_v4i32(<4 x i32> %val)
   ret void
@@ -3721,27 +4500,27 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v4i32_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 1
-; GFX9-NEXT:    v_mov_b32_e32 v1, 2
-; GFX9-NEXT:    v_mov_b32_e32 v2, 3
-; GFX9-NEXT:    v_mov_b32_e32 v3, 4
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v4i32_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; SDAG-NEXT:    v_mov_b32_e32 v1, 2
+; SDAG-NEXT:    v_mov_b32_e32 v2, 3
+; SDAG-NEXT:    v_mov_b32_e32 v3, 4
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v4i32_imm:
 ; GFX11:       ; %bb.0:
@@ -3773,6 +4552,28 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v4i32_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; GISEL-NEXT:    v_mov_b32_e32 v1, 2
+; GISEL-NEXT:    v_mov_b32_e32 v2, 3
+; GISEL-NEXT:    v_mov_b32_e32 v3, 4
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
   ret void
 }
@@ -3824,28 +4625,28 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v5i32_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v5i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v5i32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 1
-; GFX9-NEXT:    v_mov_b32_e32 v1, 2
-; GFX9-NEXT:    v_mov_b32_e32 v2, 3
-; GFX9-NEXT:    v_mov_b32_e32 v3, 4
-; GFX9-NEXT:    v_mov_b32_e32 v4, 5
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v5i32_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v5i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v5i32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; SDAG-NEXT:    v_mov_b32_e32 v1, 2
+; SDAG-NEXT:    v_mov_b32_e32 v2, 3
+; SDAG-NEXT:    v_mov_b32_e32 v3, 4
+; SDAG-NEXT:    v_mov_b32_e32 v4, 5
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v5i32_imm:
 ; GFX11:       ; %bb.0:
@@ -3879,6 +4680,29 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v5i32_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v5i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v5i32 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; GISEL-NEXT:    v_mov_b32_e32 v1, 2
+; GISEL-NEXT:    v_mov_b32_e32 v2, 3
+; GISEL-NEXT:    v_mov_b32_e32 v3, 4
+; GISEL-NEXT:    v_mov_b32_e32 v4, 5
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
   ret void
 }
@@ -3932,29 +4756,29 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v8i32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v8i32:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; SDAG-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v8i32:
 ; GFX11:       ; %bb.0:
@@ -3993,6 +4817,36 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v8i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    s_load_dwordx8 s[8:15], s[0:1], 0x0
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v0, s8
+; GISEL-NEXT:    v_mov_b32_e32 v1, s9
+; GISEL-NEXT:    v_mov_b32_e32 v2, s10
+; GISEL-NEXT:    v_mov_b32_e32 v3, s11
+; GISEL-NEXT:    v_mov_b32_e32 v4, s12
+; GISEL-NEXT:    v_mov_b32_e32 v5, s13
+; GISEL-NEXT:    v_mov_b32_e32 v6, s14
+; GISEL-NEXT:    v_mov_b32_e32 v7, s15
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %ptr = load ptr addrspace(1), ptr addrspace(4) poison
   %val = load <8 x i32>, ptr addrspace(1) %ptr
   call void @external_void_func_v8i32(<8 x i32> %val)
@@ -4052,31 +4906,31 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v8i32_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 1
-; GFX9-NEXT:    v_mov_b32_e32 v1, 2
-; GFX9-NEXT:    v_mov_b32_e32 v2, 3
-; GFX9-NEXT:    v_mov_b32_e32 v3, 4
-; GFX9-NEXT:    v_mov_b32_e32 v4, 5
-; GFX9-NEXT:    v_mov_b32_e32 v5, 6
-; GFX9-NEXT:    v_mov_b32_e32 v6, 7
-; GFX9-NEXT:    v_mov_b32_e32 v7, 8
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v8i32_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; SDAG-NEXT:    v_mov_b32_e32 v1, 2
+; SDAG-NEXT:    v_mov_b32_e32 v2, 3
+; SDAG-NEXT:    v_mov_b32_e32 v3, 4
+; SDAG-NEXT:    v_mov_b32_e32 v4, 5
+; SDAG-NEXT:    v_mov_b32_e32 v5, 6
+; SDAG-NEXT:    v_mov_b32_e32 v6, 7
+; SDAG-NEXT:    v_mov_b32_e32 v7, 8
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v8i32_imm:
 ; GFX11:       ; %bb.0:
@@ -4114,6 +4968,32 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v8i32_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 1
+; GISEL-NEXT:    v_mov_b32_e32 v1, 2
+; GISEL-NEXT:    v_mov_b32_e32 v2, 3
+; GISEL-NEXT:    v_mov_b32_e32 v3, 4
+; GISEL-NEXT:    v_mov_b32_e32 v4, 5
+; GISEL-NEXT:    v_mov_b32_e32 v5, 6
+; GISEL-NEXT:    v_mov_b32_e32 v6, 7
+; GISEL-NEXT:    v_mov_b32_e32 v7, 8
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   call void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
   ret void
 }
@@ -4171,31 +5051,31 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v16i32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
-; GFX9-NEXT:    buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
-; GFX9-NEXT:    buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v16i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v16i32:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; SDAG-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; SDAG-NEXT:    buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
+; SDAG-NEXT:    buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v16i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v16i32:
 ; GFX11:       ; %bb.0:
@@ -4238,6 +5118,44 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v16i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    s_load_dwordx16 s[8:23], s[0:1], 0x0
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v16i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i32 at rel32@hi+12
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v0, s8
+; GISEL-NEXT:    v_mov_b32_e32 v1, s9
+; GISEL-NEXT:    v_mov_b32_e32 v2, s10
+; GISEL-NEXT:    v_mov_b32_e32 v3, s11
+; GISEL-NEXT:    v_mov_b32_e32 v4, s12
+; GISEL-NEXT:    v_mov_b32_e32 v5, s13
+; GISEL-NEXT:    v_mov_b32_e32 v6, s14
+; GISEL-NEXT:    v_mov_b32_e32 v7, s15
+; GISEL-NEXT:    v_mov_b32_e32 v8, s16
+; GISEL-NEXT:    v_mov_b32_e32 v9, s17
+; GISEL-NEXT:    v_mov_b32_e32 v10, s18
+; GISEL-NEXT:    v_mov_b32_e32 v11, s19
+; GISEL-NEXT:    v_mov_b32_e32 v12, s20
+; GISEL-NEXT:    v_mov_b32_e32 v13, s21
+; GISEL-NEXT:    v_mov_b32_e32 v14, s22
+; GISEL-NEXT:    v_mov_b32_e32 v15, s23
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %ptr = load ptr addrspace(1), ptr addrspace(4) poison
   %val = load <16 x i32>, ptr addrspace(1) %ptr
   call void @external_void_func_v16i32(<16 x i32> %val)
@@ -4245,155 +5163,6 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
 }
 
 define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
-; VI-LABEL: test_call_external_void_func_v32i32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
-; VI-NEXT:    s_mov_b32 s7, 0xf000
-; VI-NEXT:    s_mov_b32 s6, -1
-; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
-; VI-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
-; VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
-; VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
-; VI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
-; VI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
-; VI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
-; VI-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
-; VI-NEXT:    s_mov_b32 s38, -1
-; VI-NEXT:    s_mov_b32 s39, 0xe80000
-; VI-NEXT:    s_add_u32 s36, s36, s3
-; VI-NEXT:    s_addc_u32 s37, s37, 0
-; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; VI-NEXT:    s_mov_b32 s32, 0
-; VI-NEXT:    s_getpc_b64 s[8:9]
-; VI-NEXT:    s_add_u32 s8, s8, external_void_func_v32i32 at rel32@lo+4
-; VI-NEXT:    s_addc_u32 s9, s9, external_void_func_v32i32 at rel32@hi+12
-; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; VI-NEXT:    s_waitcnt vmcnt(6)
-; VI-NEXT:    buffer_store_dword v31, off, s[36:39], s32
-; VI-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; VI-NEXT:    s_endpgm
-;
-; CI-LABEL: test_call_external_void_func_v32i32:
-; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
-; CI-NEXT:    s_mov_b32 s7, 0xf000
-; CI-NEXT:    s_mov_b32 s6, -1
-; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
-; CI-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
-; CI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
-; CI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
-; CI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
-; CI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
-; CI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
-; CI-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
-; CI-NEXT:    s_mov_b32 s38, -1
-; CI-NEXT:    s_mov_b32 s39, 0xe8f000
-; CI-NEXT:    s_add_u32 s36, s36, s3
-; CI-NEXT:    s_addc_u32 s37, s37, 0
-; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; CI-NEXT:    s_mov_b32 s32, 0
-; CI-NEXT:    s_getpc_b64 s[8:9]
-; CI-NEXT:    s_add_u32 s8, s8, external_void_func_v32i32 at rel32@lo+4
-; CI-NEXT:    s_addc_u32 s9, s9, external_void_func_v32i32 at rel32@hi+12
-; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; CI-NEXT:    s_waitcnt vmcnt(6)
-; CI-NEXT:    buffer_store_dword v31, off, s[36:39], s32
-; CI-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; CI-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_call_external_void_func_v32i32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT:    s_mov_b32 s7, 0xf000
-; GFX9-NEXT:    s_mov_b32 s6, -1
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
-; GFX9-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
-; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
-; GFX9-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
-; GFX9-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
-; GFX9-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
-; GFX9-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
-; GFX9-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_getpc_b64 s[8:9]
-; GFX9-NEXT:    s_add_u32 s8, s8, external_void_func_v32i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s9, s9, external_void_func_v32i32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_waitcnt vmcnt(6)
-; GFX9-NEXT:    buffer_store_dword v31, off, s[36:39], s32
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; GFX9-NEXT:    s_endpgm
-;
-; GFX11-LABEL: test_call_external_void_func_v32i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
-; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s6, -1
-; GFX11-NEXT:    s_mov_b32 s32, 0
-; GFX11-NEXT:    s_getpc_b64 s[2:3]
-; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v32i32 at rel32@lo+4
-; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v32i32 at rel32@hi+12
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_clause 0x7
-; GFX11-NEXT:    buffer_load_b128 v[28:31], off, s[4:7], 0 offset:112
-; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[4:7], 0
-; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16
-; GFX11-NEXT:    buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32
-; GFX11-NEXT:    buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48
-; GFX11-NEXT:    buffer_load_b128 v[16:19], off, s[4:7], 0 offset:64
-; GFX11-NEXT:    buffer_load_b128 v[20:23], off, s[4:7], 0 offset:80
-; GFX11-NEXT:    buffer_load_b128 v[24:27], off, s[4:7], 0 offset:96
-; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX11-NEXT:    s_waitcnt vmcnt(7)
-; GFX11-NEXT:    scratch_store_b32 off, v31, s32
-; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
-; GFX11-NEXT:    s_endpgm
-;
-; HSA-LABEL: test_call_external_void_func_v32i32:
-; HSA:       ; %bb.0:
-; HSA-NEXT:    s_add_i32 s6, s6, s9
-; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
-; HSA-NEXT:    s_add_u32 s0, s0, s9
-; HSA-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
-; HSA-NEXT:    s_mov_b32 s11, 0x1100f000
-; HSA-NEXT:    s_mov_b32 s10, -1
-; HSA-NEXT:    s_addc_u32 s1, s1, 0
-; HSA-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-NEXT:    buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112
-; HSA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
-; HSA-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
-; HSA-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
-; HSA-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
-; HSA-NEXT:    buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64
-; HSA-NEXT:    buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80
-; HSA-NEXT:    buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96
-; HSA-NEXT:    s_mov_b32 s32, 0
-; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
-; HSA-NEXT:    s_getpc_b64 s[12:13]
-; HSA-NEXT:    s_add_u32 s12, s12, external_void_func_v32i32 at rel32@lo+4
-; HSA-NEXT:    s_addc_u32 s13, s13, external_void_func_v32i32 at rel32@hi+12
-; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; HSA-NEXT:    s_waitcnt vmcnt(7)
-; HSA-NEXT:    buffer_store_dword v31, off, s[0:3], s32
-; HSA-NEXT:    s_swappc_b64 s[30:31], s[12:13]
-; HSA-NEXT:    s_endpgm
   %ptr = load ptr addrspace(1), ptr addrspace(4) poison
   %val = load <32 x i32>, ptr addrspace(1) %ptr
   call void @external_void_func_v32i32(<32 x i32> %val)
@@ -4471,40 +5240,40 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v32i32_i32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s5
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT:    s_mov_b32 s7, 0xf000
-; GFX9-NEXT:    s_mov_b32 s6, -1
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    buffer_load_dword v32, off, s[4:7], 0
-; GFX9-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
-; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
-; GFX9-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
-; GFX9-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
-; GFX9-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
-; GFX9-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
-; GFX9-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
-; GFX9-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v32i32_i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v32i32_i32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_waitcnt vmcnt(8)
-; GFX9-NEXT:    buffer_store_dword v32, off, s[36:39], s32 offset:4
-; GFX9-NEXT:    s_waitcnt vmcnt(8)
-; GFX9-NEXT:    buffer_store_dword v31, off, s[36:39], s32
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v32i32_i32:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s5
+; SDAG-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
+; SDAG-NEXT:    s_mov_b32 s7, 0xf000
+; SDAG-NEXT:    s_mov_b32 s6, -1
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-NEXT:    buffer_load_dword v32, off, s[4:7], 0
+; SDAG-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
+; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
+; SDAG-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
+; SDAG-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
+; SDAG-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
+; SDAG-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
+; SDAG-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
+; SDAG-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v32i32_i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v32i32_i32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_waitcnt vmcnt(8)
+; SDAG-NEXT:    buffer_store_dword v32, off, s[36:39], s32 offset:4
+; SDAG-NEXT:    s_waitcnt vmcnt(8)
+; SDAG-NEXT:    buffer_store_dword v31, off, s[36:39], s32
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v32i32_i32:
 ; GFX11:       ; %bb.0:
@@ -4566,6 +5335,67 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
 ; HSA-NEXT:    buffer_store_dword v31, off, s[0:3], s32
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v32i32_i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s53, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s54, -1
+; GISEL-NEXT:    s_mov_b32 s55, 0xe00000
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GISEL-NEXT:    s_load_dwordx16 s[8:23], s[0:1], 0x40
+; GISEL-NEXT:    s_load_dwordx16 s[36:51], s[0:1], 0x0
+; GISEL-NEXT:    s_add_u32 s52, s52, s5
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_addc_u32 s53, s53, 0
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GISEL-NEXT:    ; kill: killed $sgpr0_sgpr1
+; GISEL-NEXT:    ; kill: killed $sgpr0_sgpr1
+; GISEL-NEXT:    buffer_store_dword v0, off, s[52:55], s32 offset:4
+; GISEL-NEXT:    v_mov_b32_e32 v0, s23
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[52:53]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v32i32_i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v32i32_i32 at rel32@hi+12
+; GISEL-NEXT:    buffer_store_dword v0, off, s[52:55], s32
+; GISEL-NEXT:    v_mov_b32_e32 v0, s36
+; GISEL-NEXT:    v_mov_b32_e32 v1, s37
+; GISEL-NEXT:    v_mov_b32_e32 v2, s38
+; GISEL-NEXT:    v_mov_b32_e32 v3, s39
+; GISEL-NEXT:    v_mov_b32_e32 v4, s40
+; GISEL-NEXT:    v_mov_b32_e32 v5, s41
+; GISEL-NEXT:    v_mov_b32_e32 v6, s42
+; GISEL-NEXT:    v_mov_b32_e32 v7, s43
+; GISEL-NEXT:    v_mov_b32_e32 v8, s44
+; GISEL-NEXT:    v_mov_b32_e32 v9, s45
+; GISEL-NEXT:    v_mov_b32_e32 v10, s46
+; GISEL-NEXT:    v_mov_b32_e32 v11, s47
+; GISEL-NEXT:    v_mov_b32_e32 v12, s48
+; GISEL-NEXT:    v_mov_b32_e32 v13, s49
+; GISEL-NEXT:    v_mov_b32_e32 v14, s50
+; GISEL-NEXT:    v_mov_b32_e32 v15, s51
+; GISEL-NEXT:    v_mov_b32_e32 v16, s8
+; GISEL-NEXT:    v_mov_b32_e32 v17, s9
+; GISEL-NEXT:    v_mov_b32_e32 v18, s10
+; GISEL-NEXT:    v_mov_b32_e32 v19, s11
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[54:55]
+; GISEL-NEXT:    v_mov_b32_e32 v20, s12
+; GISEL-NEXT:    v_mov_b32_e32 v21, s13
+; GISEL-NEXT:    v_mov_b32_e32 v22, s14
+; GISEL-NEXT:    v_mov_b32_e32 v23, s15
+; GISEL-NEXT:    v_mov_b32_e32 v24, s16
+; GISEL-NEXT:    v_mov_b32_e32 v25, s17
+; GISEL-NEXT:    v_mov_b32_e32 v26, s18
+; GISEL-NEXT:    v_mov_b32_e32 v27, s19
+; GISEL-NEXT:    v_mov_b32_e32 v28, s20
+; GISEL-NEXT:    v_mov_b32_e32 v29, s21
+; GISEL-NEXT:    v_mov_b32_e32 v30, s22
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
   %val0 = load <32 x i32>, ptr addrspace(1) %ptr0
   %val1 = load i32, ptr addrspace(1) poison
@@ -4622,29 +5452,29 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1)
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_i32_func_i32_imm:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s50, -1
-; GFX9-NEXT:    s_mov_b32 s51, 0xe00000
-; GFX9-NEXT:    s_add_u32 s48, s48, s5
-; GFX9-NEXT:    s_load_dwordx2 s[36:37], s[2:3], 0x24
-; GFX9-NEXT:    s_addc_u32 s49, s49, 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_i32_func_i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_i32_func_i32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT:    v_mov_b32_e32 v0, 42
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_mov_b32 s39, 0xf000
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    buffer_store_dword v0, off, s[36:39], 0
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_i32_func_i32_imm:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s50, -1
+; SDAG-NEXT:    s_mov_b32 s51, 0xe00000
+; SDAG-NEXT:    s_add_u32 s48, s48, s5
+; SDAG-NEXT:    s_load_dwordx2 s[36:37], s[2:3], 0x24
+; SDAG-NEXT:    s_addc_u32 s49, s49, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[48:49]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_i32_func_i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_i32_func_i32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[50:51]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 42
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_mov_b32 s39, 0xf000
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    buffer_store_dword v0, off, s[36:39], 0
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_i32_func_i32_imm:
 ; GFX11:       ; %bb.0:
@@ -4682,6 +5512,30 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1)
 ; HSA-NEXT:    buffer_store_dword v0, off, s[36:39], 0
 ; HSA-NEXT:    s_waitcnt vmcnt(0)
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_i32_func_i32_imm:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s50, -1
+; GISEL-NEXT:    s_mov_b32 s51, 0xe00000
+; GISEL-NEXT:    s_add_u32 s48, s48, s5
+; GISEL-NEXT:    s_load_dwordx2 s[36:37], s[2:3], 0x24
+; GISEL-NEXT:    s_addc_u32 s49, s49, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[48:49]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_i32_func_i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_i32_func_i32 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 42
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[50:51]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xf000
+; GISEL-NEXT:    buffer_store_dword v0, off, s[36:39], 0
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    s_endpgm
   %val = call i32 @external_i32_func_i32(i32 42)
   store volatile i32 %val, ptr addrspace(1) %out
   ret void
@@ -4736,29 +5590,29 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_struct_i8_i32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
-; GFX9-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:4
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_struct_i8_i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_struct_i8_i32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_struct_i8_i32:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
+; SDAG-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:4
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_struct_i8_i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_struct_i8_i32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_struct_i8_i32:
 ; GFX11:       ; %bb.0:
@@ -4797,6 +5651,30 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
 ; HSA-NEXT:    s_mov_b32 s32, 0
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_struct_i8_i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[8:9]
+; GISEL-NEXT:    s_add_u32 s8, s8, external_void_func_struct_i8_i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s9, s9, external_void_func_struct_i8_i32 at rel32@hi+12
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GISEL-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; GISEL-NEXT:    s_endpgm
   %ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
   %val = load { i8, i32 }, ptr addrspace(1) %ptr0
   call void @external_void_func_struct_i8_i32({ i8, i32 } %val)
@@ -4860,34 +5738,34 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    v_mov_b32_e32 v0, 3
-; GFX9-NEXT:    buffer_store_byte v0, off, s[36:39], 0
-; GFX9-NEXT:    v_mov_b32_e32 v0, 8
-; GFX9-NEXT:    buffer_store_dword v0, off, s[36:39], 0 offset:4
-; GFX9-NEXT:    buffer_load_dword v0, off, s[36:39], 0 offset:4
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_load_dword v1, off, s[36:39], 0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_movk_i32 s32, 0x400
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    buffer_store_dword v0, off, s[36:39], s32 offset:4
-; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    buffer_store_dword v1, off, s[36:39], s32
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_byval_struct_i8_i32:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    v_mov_b32_e32 v0, 3
+; SDAG-NEXT:    buffer_store_byte v0, off, s[36:39], 0
+; SDAG-NEXT:    v_mov_b32_e32 v0, 8
+; SDAG-NEXT:    buffer_store_dword v0, off, s[36:39], 0 offset:4
+; SDAG-NEXT:    buffer_load_dword v0, off, s[36:39], 0 offset:4
+; SDAG-NEXT:    s_nop 0
+; SDAG-NEXT:    buffer_load_dword v1, off, s[36:39], 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_movk_i32 s32, 0x400
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_waitcnt vmcnt(1)
+; SDAG-NEXT:    buffer_store_dword v0, off, s[36:39], s32 offset:4
+; SDAG-NEXT:    s_waitcnt vmcnt(1)
+; SDAG-NEXT:    buffer_store_dword v1, off, s[36:39], s32
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-TRUE16-LABEL: test_call_external_void_func_byval_struct_i8_i32:
 ; GFX11-TRUE16:       ; %bb.0:
@@ -4948,6 +5826,35 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
 ; HSA-NEXT:    buffer_store_dword v1, off, s[0:3], s32
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_byval_struct_i8_i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    v_mov_b32_e32 v0, 3
+; GISEL-NEXT:    buffer_store_byte v0, off, s[36:39], 0
+; GISEL-NEXT:    v_mov_b32_e32 v0, 8
+; GISEL-NEXT:    buffer_store_dword v0, off, s[36:39], 0 offset:4
+; GISEL-NEXT:    buffer_load_dword v0, off, s[36:39], 0
+; GISEL-NEXT:    s_nop 0
+; GISEL-NEXT:    buffer_load_dword v1, off, s[36:39], 0 offset:4
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_movk_i32 s32, 0x400
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32 at rel32@hi+12
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_waitcnt vmcnt(1)
+; GISEL-NEXT:    buffer_store_dword v0, off, s[36:39], s32
+; GISEL-NEXT:    s_waitcnt vmcnt(1)
+; GISEL-NEXT:    buffer_store_dword v1, off, s[36:39], s32 offset:4
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %val = alloca { i8, i32 }, align 8, addrspace(5)
   %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 0
   %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 1
@@ -5034,44 +5941,44 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s5
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    v_mov_b32_e32 v0, 3
-; GFX9-NEXT:    buffer_store_byte v0, off, s[36:39], 0
-; GFX9-NEXT:    v_mov_b32_e32 v0, 8
-; GFX9-NEXT:    buffer_store_dword v0, off, s[36:39], 0 offset:4
-; GFX9-NEXT:    buffer_load_dword v0, off, s[36:39], 0 offset:4
-; GFX9-NEXT:    s_nop 0
-; GFX9-NEXT:    buffer_load_dword v1, off, s[36:39], 0
-; GFX9-NEXT:    s_movk_i32 s32, 0x800
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    buffer_store_dword v0, off, s[36:39], s32 offset:4
-; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    buffer_store_dword v1, off, s[36:39], s32
-; GFX9-NEXT:    v_mov_b32_e32 v0, 8
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    buffer_load_ubyte v0, off, s[36:39], 0 offset:8
-; GFX9-NEXT:    buffer_load_dword v1, off, s[36:39], 0 offset:12
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    buffer_store_byte v0, off, s[0:3], 0
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    buffer_store_dword v1, off, s[0:3], 0
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s5
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    v_mov_b32_e32 v0, 3
+; SDAG-NEXT:    buffer_store_byte v0, off, s[36:39], 0
+; SDAG-NEXT:    v_mov_b32_e32 v0, 8
+; SDAG-NEXT:    buffer_store_dword v0, off, s[36:39], 0 offset:4
+; SDAG-NEXT:    buffer_load_dword v0, off, s[36:39], 0 offset:4
+; SDAG-NEXT:    s_nop 0
+; SDAG-NEXT:    buffer_load_dword v1, off, s[36:39], 0
+; SDAG-NEXT:    s_movk_i32 s32, 0x800
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_waitcnt vmcnt(1)
+; SDAG-NEXT:    buffer_store_dword v0, off, s[36:39], s32 offset:4
+; SDAG-NEXT:    s_waitcnt vmcnt(1)
+; SDAG-NEXT:    buffer_store_dword v1, off, s[36:39], s32
+; SDAG-NEXT:    v_mov_b32_e32 v0, 8
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    buffer_load_ubyte v0, off, s[36:39], 0 offset:8
+; SDAG-NEXT:    buffer_load_dword v1, off, s[36:39], 0 offset:12
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    s_waitcnt vmcnt(1)
+; SDAG-NEXT:    buffer_store_byte v0, off, s[0:3], 0
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    buffer_store_dword v1, off, s[0:3], 0
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-TRUE16-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
 ; GFX11-TRUE16:       ; %bb.0:
@@ -5170,6 +6077,45 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
 ; HSA-NEXT:    buffer_store_dword v1, off, s[4:7], 0
 ; HSA-NEXT:    s_waitcnt vmcnt(0)
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s5
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    v_mov_b32_e32 v0, 3
+; GISEL-NEXT:    buffer_store_byte v0, off, s[36:39], 0
+; GISEL-NEXT:    v_mov_b32_e32 v0, 8
+; GISEL-NEXT:    buffer_store_dword v0, off, s[36:39], 0 offset:4
+; GISEL-NEXT:    buffer_load_dword v0, off, s[36:39], 0
+; GISEL-NEXT:    s_nop 0
+; GISEL-NEXT:    buffer_load_dword v1, off, s[36:39], 0 offset:4
+; GISEL-NEXT:    s_movk_i32 s32, 0x800
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@hi+12
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_waitcnt vmcnt(1)
+; GISEL-NEXT:    buffer_store_dword v0, off, s[36:39], s32
+; GISEL-NEXT:    s_waitcnt vmcnt(1)
+; GISEL-NEXT:    buffer_store_dword v1, off, s[36:39], s32 offset:4
+; GISEL-NEXT:    v_mov_b32_e32 v0, 8
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    buffer_load_ubyte v0, off, s[36:39], 0 offset:8
+; GISEL-NEXT:    buffer_load_dword v1, off, s[36:39], 0 offset:12
+; GISEL-NEXT:    s_mov_b32 s2, -1
+; GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; GISEL-NEXT:    s_waitcnt vmcnt(1)
+; GISEL-NEXT:    buffer_store_byte v0, off, s[0:3], 0
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    buffer_store_dword v1, off, s[0:3], 0
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    s_endpgm
   %in.val = alloca { i8, i32 }, align 8, addrspace(5)
   %out.val = alloca { i8, i32 }, align 8, addrspace(5)
   %in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0
@@ -5272,47 +6218,47 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: test_call_external_void_func_v16i8:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s38, -1
-; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
-; GFX9-NEXT:    s_add_u32 s36, s36, s3
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v16i8 at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i8 at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_lshrrev_b32_e32 v16, 8, v0
-; GFX9-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
-; GFX9-NEXT:    v_lshrrev_b32_e32 v18, 24, v0
-; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
-; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
-; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
-; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
-; GFX9-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
-; GFX9-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
-; GFX9-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
-; GFX9-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
-; GFX9-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
-; GFX9-NEXT:    v_mov_b32_e32 v4, v1
-; GFX9-NEXT:    v_mov_b32_e32 v8, v2
-; GFX9-NEXT:    v_mov_b32_e32 v12, v3
-; GFX9-NEXT:    v_mov_b32_e32 v1, v16
-; GFX9-NEXT:    v_mov_b32_e32 v2, v17
-; GFX9-NEXT:    v_mov_b32_e32 v3, v18
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: test_call_external_void_func_v16i8:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_mov_b32 s3, 0xf000
+; SDAG-NEXT:    s_mov_b32 s2, -1
+; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, external_void_func_v16i8 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i8 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    v_lshrrev_b32_e32 v16, 8, v0
+; SDAG-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
+; SDAG-NEXT:    v_lshrrev_b32_e32 v18, 24, v0
+; SDAG-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
+; SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; SDAG-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
+; SDAG-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
+; SDAG-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
+; SDAG-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
+; SDAG-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
+; SDAG-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
+; SDAG-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
+; SDAG-NEXT:    v_mov_b32_e32 v4, v1
+; SDAG-NEXT:    v_mov_b32_e32 v8, v2
+; SDAG-NEXT:    v_mov_b32_e32 v12, v3
+; SDAG-NEXT:    v_mov_b32_e32 v1, v16
+; SDAG-NEXT:    v_mov_b32_e32 v2, v17
+; SDAG-NEXT:    v_mov_b32_e32 v3, v18
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_call_external_void_func_v16i8:
 ; GFX11:       ; %bb.0:
@@ -5384,6 +6330,56 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
 ; HSA-NEXT:    v_mov_b32_e32 v3, v18
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v16i8:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s38, -1
+; GISEL-NEXT:    s_mov_b32 s39, 0xe00000
+; GISEL-NEXT:    s_add_u32 s36, s36, s3
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
+; GISEL-NEXT:    s_addc_u32 s37, s37, 0
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    s_lshr_b32 s8, s0, 8
+; GISEL-NEXT:    s_lshr_b32 s9, s0, 16
+; GISEL-NEXT:    s_lshr_b32 s10, s0, 24
+; GISEL-NEXT:    s_lshr_b32 s11, s1, 8
+; GISEL-NEXT:    s_lshr_b32 s12, s1, 16
+; GISEL-NEXT:    s_lshr_b32 s13, s1, 24
+; GISEL-NEXT:    s_lshr_b32 s14, s2, 8
+; GISEL-NEXT:    s_lshr_b32 s15, s2, 16
+; GISEL-NEXT:    s_lshr_b32 s16, s2, 24
+; GISEL-NEXT:    s_lshr_b32 s17, s3, 8
+; GISEL-NEXT:    s_lshr_b32 s18, s3, 16
+; GISEL-NEXT:    s_lshr_b32 s19, s3, 24
+; GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-NEXT:    v_mov_b32_e32 v4, s1
+; GISEL-NEXT:    v_mov_b32_e32 v8, s2
+; GISEL-NEXT:    v_mov_b32_e32 v12, s3
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v16i8 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i8 at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v1, s8
+; GISEL-NEXT:    v_mov_b32_e32 v2, s9
+; GISEL-NEXT:    v_mov_b32_e32 v3, s10
+; GISEL-NEXT:    v_mov_b32_e32 v5, s11
+; GISEL-NEXT:    v_mov_b32_e32 v6, s12
+; GISEL-NEXT:    v_mov_b32_e32 v7, s13
+; GISEL-NEXT:    v_mov_b32_e32 v9, s14
+; GISEL-NEXT:    v_mov_b32_e32 v10, s15
+; GISEL-NEXT:    v_mov_b32_e32 v11, s16
+; GISEL-NEXT:    v_mov_b32_e32 v13, s17
+; GISEL-NEXT:    v_mov_b32_e32 v14, s18
+; GISEL-NEXT:    v_mov_b32_e32 v15, s19
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %ptr = load ptr addrspace(1), ptr addrspace(4) poison
   %val = load <16 x i8>, ptr addrspace(1) %ptr
   call void @external_void_func_v16i8(<16 x i8> %val)
@@ -5509,64 +6505,64 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
 ; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; CI-NEXT:    s_endpgm
 ;
-; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64:
-; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s53, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s54, -1
-; GFX9-NEXT:    s_mov_b32 s55, 0xe00000
-; GFX9-NEXT:    s_add_u32 s52, s52, s5
-; GFX9-NEXT:    s_load_dwordx16 s[8:23], s[2:3], 0x64
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0xa4
-; GFX9-NEXT:    s_load_dwordx16 s[36:51], s[2:3], 0x24
-; GFX9-NEXT:    s_mov_b32 s32, 0
-; GFX9-NEXT:    s_addc_u32 s53, s53, 0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, s23
-; GFX9-NEXT:    buffer_store_dword v0, off, s[52:55], s32
-; GFX9-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9-NEXT:    buffer_store_dword v0, off, s[52:55], s32 offset:4
-; GFX9-NEXT:    v_mov_b32_e32 v0, s5
-; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[52:53]
-; GFX9-NEXT:    buffer_store_dword v0, off, s[52:55], s32 offset:8
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, stack_passed_f64_arg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, stack_passed_f64_arg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[54:55]
-; GFX9-NEXT:    v_mov_b32_e32 v0, s36
-; GFX9-NEXT:    v_mov_b32_e32 v1, s37
-; GFX9-NEXT:    v_mov_b32_e32 v2, s38
-; GFX9-NEXT:    v_mov_b32_e32 v3, s39
-; GFX9-NEXT:    v_mov_b32_e32 v4, s40
-; GFX9-NEXT:    v_mov_b32_e32 v5, s41
-; GFX9-NEXT:    v_mov_b32_e32 v6, s42
-; GFX9-NEXT:    v_mov_b32_e32 v7, s43
-; GFX9-NEXT:    v_mov_b32_e32 v8, s44
-; GFX9-NEXT:    v_mov_b32_e32 v9, s45
-; GFX9-NEXT:    v_mov_b32_e32 v10, s46
-; GFX9-NEXT:    v_mov_b32_e32 v11, s47
-; GFX9-NEXT:    v_mov_b32_e32 v12, s48
-; GFX9-NEXT:    v_mov_b32_e32 v13, s49
-; GFX9-NEXT:    v_mov_b32_e32 v14, s50
-; GFX9-NEXT:    v_mov_b32_e32 v15, s51
-; GFX9-NEXT:    v_mov_b32_e32 v16, s8
-; GFX9-NEXT:    v_mov_b32_e32 v17, s9
-; GFX9-NEXT:    v_mov_b32_e32 v18, s10
-; GFX9-NEXT:    v_mov_b32_e32 v19, s11
-; GFX9-NEXT:    v_mov_b32_e32 v20, s12
-; GFX9-NEXT:    v_mov_b32_e32 v21, s13
-; GFX9-NEXT:    v_mov_b32_e32 v22, s14
-; GFX9-NEXT:    v_mov_b32_e32 v23, s15
-; GFX9-NEXT:    v_mov_b32_e32 v24, s16
-; GFX9-NEXT:    v_mov_b32_e32 v25, s17
-; GFX9-NEXT:    v_mov_b32_e32 v26, s18
-; GFX9-NEXT:    v_mov_b32_e32 v27, s19
-; GFX9-NEXT:    v_mov_b32_e32 v28, s20
-; GFX9-NEXT:    v_mov_b32_e32 v29, s21
-; GFX9-NEXT:    v_mov_b32_e32 v30, s22
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT:    s_endpgm
+; SDAG-LABEL: stack_passed_arg_alignment_v32i32_f64:
+; SDAG:       ; %bb.0: ; %entry
+; SDAG-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s53, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_mov_b32 s54, -1
+; SDAG-NEXT:    s_mov_b32 s55, 0xe00000
+; SDAG-NEXT:    s_add_u32 s52, s52, s5
+; SDAG-NEXT:    s_load_dwordx16 s[8:23], s[2:3], 0x64
+; SDAG-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0xa4
+; SDAG-NEXT:    s_load_dwordx16 s[36:51], s[2:3], 0x24
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_addc_u32 s53, s53, 0
+; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-NEXT:    v_mov_b32_e32 v0, s23
+; SDAG-NEXT:    buffer_store_dword v0, off, s[52:55], s32
+; SDAG-NEXT:    v_mov_b32_e32 v0, s4
+; SDAG-NEXT:    buffer_store_dword v0, off, s[52:55], s32 offset:4
+; SDAG-NEXT:    v_mov_b32_e32 v0, s5
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[52:53]
+; SDAG-NEXT:    buffer_store_dword v0, off, s[52:55], s32 offset:8
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, stack_passed_f64_arg at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, stack_passed_f64_arg at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[54:55]
+; SDAG-NEXT:    v_mov_b32_e32 v0, s36
+; SDAG-NEXT:    v_mov_b32_e32 v1, s37
+; SDAG-NEXT:    v_mov_b32_e32 v2, s38
+; SDAG-NEXT:    v_mov_b32_e32 v3, s39
+; SDAG-NEXT:    v_mov_b32_e32 v4, s40
+; SDAG-NEXT:    v_mov_b32_e32 v5, s41
+; SDAG-NEXT:    v_mov_b32_e32 v6, s42
+; SDAG-NEXT:    v_mov_b32_e32 v7, s43
+; SDAG-NEXT:    v_mov_b32_e32 v8, s44
+; SDAG-NEXT:    v_mov_b32_e32 v9, s45
+; SDAG-NEXT:    v_mov_b32_e32 v10, s46
+; SDAG-NEXT:    v_mov_b32_e32 v11, s47
+; SDAG-NEXT:    v_mov_b32_e32 v12, s48
+; SDAG-NEXT:    v_mov_b32_e32 v13, s49
+; SDAG-NEXT:    v_mov_b32_e32 v14, s50
+; SDAG-NEXT:    v_mov_b32_e32 v15, s51
+; SDAG-NEXT:    v_mov_b32_e32 v16, s8
+; SDAG-NEXT:    v_mov_b32_e32 v17, s9
+; SDAG-NEXT:    v_mov_b32_e32 v18, s10
+; SDAG-NEXT:    v_mov_b32_e32 v19, s11
+; SDAG-NEXT:    v_mov_b32_e32 v20, s12
+; SDAG-NEXT:    v_mov_b32_e32 v21, s13
+; SDAG-NEXT:    v_mov_b32_e32 v22, s14
+; SDAG-NEXT:    v_mov_b32_e32 v23, s15
+; SDAG-NEXT:    v_mov_b32_e32 v24, s16
+; SDAG-NEXT:    v_mov_b32_e32 v25, s17
+; SDAG-NEXT:    v_mov_b32_e32 v26, s18
+; SDAG-NEXT:    v_mov_b32_e32 v27, s19
+; SDAG-NEXT:    v_mov_b32_e32 v28, s20
+; SDAG-NEXT:    v_mov_b32_e32 v29, s21
+; SDAG-NEXT:    v_mov_b32_e32 v30, s22
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SDAG-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: stack_passed_arg_alignment_v32i32_f64:
 ; GFX11:       ; %bb.0: ; %entry
@@ -5662,6 +6658,65 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
 ; HSA-NEXT:    v_mov_b32_e32 v30, s22
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[24:25]
 ; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: stack_passed_arg_alignment_v32i32_f64:
+; GISEL:       ; %bb.0: ; %entry
+; GISEL-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s53, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s54, -1
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_load_dwordx16 s[8:23], s[2:3], 0x64
+; GISEL-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0xa4
+; GISEL-NEXT:    s_load_dwordx16 s[36:51], s[2:3], 0x24
+; GISEL-NEXT:    s_mov_b32 s55, 0xe00000
+; GISEL-NEXT:    s_add_u32 s52, s52, s5
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_addc_u32 s53, s53, 0
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v0, s23
+; GISEL-NEXT:    buffer_store_dword v0, off, s[52:55], s32
+; GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-NEXT:    buffer_store_dword v0, off, s[52:55], s32 offset:4
+; GISEL-NEXT:    v_mov_b32_e32 v0, s1
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[52:53]
+; GISEL-NEXT:    buffer_store_dword v0, off, s[52:55], s32 offset:8
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, stack_passed_f64_arg at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, stack_passed_f64_arg at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, s36
+; GISEL-NEXT:    v_mov_b32_e32 v1, s37
+; GISEL-NEXT:    v_mov_b32_e32 v2, s38
+; GISEL-NEXT:    v_mov_b32_e32 v3, s39
+; GISEL-NEXT:    v_mov_b32_e32 v4, s40
+; GISEL-NEXT:    v_mov_b32_e32 v5, s41
+; GISEL-NEXT:    v_mov_b32_e32 v6, s42
+; GISEL-NEXT:    v_mov_b32_e32 v7, s43
+; GISEL-NEXT:    v_mov_b32_e32 v8, s44
+; GISEL-NEXT:    v_mov_b32_e32 v9, s45
+; GISEL-NEXT:    v_mov_b32_e32 v10, s46
+; GISEL-NEXT:    v_mov_b32_e32 v11, s47
+; GISEL-NEXT:    v_mov_b32_e32 v12, s48
+; GISEL-NEXT:    v_mov_b32_e32 v13, s49
+; GISEL-NEXT:    v_mov_b32_e32 v14, s50
+; GISEL-NEXT:    v_mov_b32_e32 v15, s51
+; GISEL-NEXT:    v_mov_b32_e32 v16, s8
+; GISEL-NEXT:    v_mov_b32_e32 v17, s9
+; GISEL-NEXT:    v_mov_b32_e32 v18, s10
+; GISEL-NEXT:    v_mov_b32_e32 v19, s11
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[54:55]
+; GISEL-NEXT:    v_mov_b32_e32 v20, s12
+; GISEL-NEXT:    v_mov_b32_e32 v21, s13
+; GISEL-NEXT:    v_mov_b32_e32 v22, s14
+; GISEL-NEXT:    v_mov_b32_e32 v23, s15
+; GISEL-NEXT:    v_mov_b32_e32 v24, s16
+; GISEL-NEXT:    v_mov_b32_e32 v25, s17
+; GISEL-NEXT:    v_mov_b32_e32 v26, s18
+; GISEL-NEXT:    v_mov_b32_e32 v27, s19
+; GISEL-NEXT:    v_mov_b32_e32 v28, s20
+; GISEL-NEXT:    v_mov_b32_e32 v29, s21
+; GISEL-NEXT:    v_mov_b32_e32 v30, s22
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
 entry:
   call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
   ret void
@@ -5702,22 +6757,22 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
 ; CI-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:16
 ; CI-NEXT:    s_setpc_b64 s[4:5]
 ;
-; GFX9-LABEL: tail_call_byval_align16:
-; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
-; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32
-; GFX9-NEXT:    s_getpc_b64 s[4:5]
-; GFX9-NEXT:    s_add_u32 s4, s4, byval_align16_f64_arg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s5, s5, byval_align16_f64_arg at rel32@hi+12
-; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:20
-; GFX9-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:24
-; GFX9-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-NEXT:    buffer_store_dword v32, off, s[0:3], s32
-; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:16
-; GFX9-NEXT:    s_setpc_b64 s[4:5]
+; SDAG-LABEL: tail_call_byval_align16:
+; SDAG:       ; %bb.0: ; %entry
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
+; SDAG-NEXT:    buffer_load_dword v32, off, s[0:3], s32
+; SDAG-NEXT:    s_getpc_b64 s[4:5]
+; SDAG-NEXT:    s_add_u32 s4, s4, byval_align16_f64_arg at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s5, s5, byval_align16_f64_arg at rel32@hi+12
+; SDAG-NEXT:    s_waitcnt vmcnt(1)
+; SDAG-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:20
+; SDAG-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:24
+; SDAG-NEXT:    s_waitcnt vmcnt(2)
+; SDAG-NEXT:    buffer_store_dword v32, off, s[0:3], s32
+; SDAG-NEXT:    s_waitcnt vmcnt(1)
+; SDAG-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:16
+; SDAG-NEXT:    s_setpc_b64 s[4:5]
 ;
 ; GFX11-LABEL: tail_call_byval_align16:
 ; GFX11:       ; %bb.0: ; %entry
@@ -5749,6 +6804,23 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
 ; HSA-NEXT:    s_waitcnt vmcnt(1)
 ; HSA-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:16
 ; HSA-NEXT:    s_setpc_b64 s[4:5]
+;
+; GISEL-LABEL: tail_call_byval_align16:
+; GISEL:       ; %bb.0: ; %entry
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    buffer_load_dword v31, off, s[0:3], s32
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, byval_align16_f64_arg at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, byval_align16_f64_arg at rel32@hi+12
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    buffer_store_dword v31, off, s[0:3], s32
+; GISEL-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:20
+; GISEL-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:24
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:16
+; GISEL-NEXT:    s_setpc_b64 s[4:5]
 entry:
   %alloca = alloca double, align 8, addrspace(5)
   tail call void @byval_align16_f64_arg(<32 x i32> %val, ptr addrspace(5) byval(double) align 16 %alloca)
diff --git a/llvm/test/CodeGen/AMDGPU/call-c-function.ll b/llvm/test/CodeGen/AMDGPU/call-c-function.ll
index e1bb3eab25efd..4fbc7271ba0c5 100644
--- a/llvm/test/CodeGen/AMDGPU/call-c-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-c-function.ll
@@ -1,21 +1,68 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel=0 -stop-after=finalize-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel=0 -stop-after=finalize-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=SDAG -enable-var-scope %s
+; RUN: llc -global-isel=1 -stop-after=finalize-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GISEL -enable-var-scope %s
 
 ; Test that we don't explode on calls from shaders to functions with the C calling convention.
 
 define amdgpu_ps void @amdgpu_ps_call_default_cc() {
-  ; CHECK-LABEL: name: amdgpu_ps_call_default_cc
-  ; CHECK: bb.0.main_body:
-  ; CHECK-NEXT:   S_ENDPGM 0
+  ; SDAG-LABEL: name: amdgpu_ps_call_default_cc
+  ; SDAG: bb.0.main_body:
+  ; SDAG-NEXT:   S_ENDPGM 0
+  ;
+  ; GISEL-LABEL: name: amdgpu_ps_call_default_cc
+  ; GISEL: bb.1.main_body:
+  ; GISEL-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; GISEL-NEXT:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; GISEL-NEXT:   [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; GISEL-NEXT:   [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; GISEL-NEXT:   [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
+  ; GISEL-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]]
+  ; GISEL-NEXT:   $sgpr4_sgpr5 = COPY [[DEF]]
+  ; GISEL-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]]
+  ; GISEL-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
+  ; GISEL-NEXT:   $sgpr8_sgpr9 = COPY [[S_MOV_B]]
+  ; GISEL-NEXT:   $sgpr10_sgpr11 = COPY [[DEF1]]
+  ; GISEL-NEXT:   $sgpr12 = COPY [[DEF2]]
+  ; GISEL-NEXT:   $sgpr13 = COPY [[DEF2]]
+  ; GISEL-NEXT:   $sgpr14 = COPY [[DEF2]]
+  ; GISEL-NEXT:   $sgpr15 = COPY [[DEF2]]
+  ; GISEL-NEXT:   $vgpr31 = COPY [[DEF2]]
+  ; GISEL-NEXT:   [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
+  ; GISEL-NEXT:   $sgpr30_sgpr31 = noconvergent SI_CALL [[S_MOV_B1]], 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+  ; GISEL-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; GISEL-NEXT:   S_ENDPGM 0
 main_body:
   call void null()
   ret void
 }
 
 define amdgpu_gfx void @amdgpu_gfx_call_default_cc() {
-  ; CHECK-LABEL: name: amdgpu_gfx_call_default_cc
-  ; CHECK: bb.0.main_body:
-  ; CHECK-NEXT:   SI_RETURN
+  ; SDAG-LABEL: name: amdgpu_gfx_call_default_cc
+  ; SDAG: bb.0.main_body:
+  ; SDAG-NEXT:   SI_RETURN
+  ;
+  ; GISEL-LABEL: name: amdgpu_gfx_call_default_cc
+  ; GISEL: bb.1.main_body:
+  ; GISEL-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; GISEL-NEXT:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; GISEL-NEXT:   [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; GISEL-NEXT:   [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; GISEL-NEXT:   [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GISEL-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]]
+  ; GISEL-NEXT:   $sgpr4_sgpr5 = COPY [[DEF]]
+  ; GISEL-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]]
+  ; GISEL-NEXT:   [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
+  ; GISEL-NEXT:   $sgpr8_sgpr9 = COPY [[S_MOV_B]]
+  ; GISEL-NEXT:   $sgpr10_sgpr11 = COPY [[DEF1]]
+  ; GISEL-NEXT:   $sgpr12 = COPY [[DEF2]]
+  ; GISEL-NEXT:   $sgpr13 = COPY [[DEF2]]
+  ; GISEL-NEXT:   $sgpr14 = COPY [[DEF2]]
+  ; GISEL-NEXT:   $sgpr15 = COPY [[DEF2]]
+  ; GISEL-NEXT:   $vgpr31 = COPY [[DEF2]]
+  ; GISEL-NEXT:   [[S_MOV_B1:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 0
+  ; GISEL-NEXT:   $sgpr30_sgpr31 = noconvergent SI_CALL [[S_MOV_B1]], 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+  ; GISEL-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; GISEL-NEXT:   SI_RETURN
 main_body:
   call void null()
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
index 5f324df30f7e2..fe0b0188d2d37 100644
--- a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
@@ -1,84 +1,341 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=GCN %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=GCN,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=GCN,GISEL %s
 
-; GCN-LABEL: {{^}}test_bitcast_return_type_noinline:
-; GCN: s_getpc_b64
-; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_noinline at rel32@lo+4
-; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_noinline at rel32@hi+12
-; GCN: s_swappc_b64
 define amdgpu_kernel void @test_bitcast_return_type_noinline() #0 {
+; SDAG-LABEL: test_bitcast_return_type_noinline:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; SDAG-NEXT:    s_add_i32 s12, s12, s17
+; SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; SDAG-NEXT:    s_add_u32 s0, s0, s17
+; SDAG-NEXT:    s_addc_u32 s1, s1, 0
+; SDAG-NEXT:    s_mov_b32 s13, s15
+; SDAG-NEXT:    s_mov_b32 s12, s14
+; SDAG-NEXT:    s_getpc_b64 s[18:19]
+; SDAG-NEXT:    s_add_u32 s18, s18, ret_i32_noinline at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s19, s19, ret_i32_noinline at rel32@hi+12
+; SDAG-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; SDAG-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG-NEXT:    v_or_b32_e32 v31, v0, v2
+; SDAG-NEXT:    s_mov_b32 s14, s16
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; SDAG-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; SDAG-NEXT:    flat_store_dword v[0:1], v0
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_bitcast_return_type_noinline:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL-NEXT:    s_add_u32 s0, s0, s17
+; GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GISEL-NEXT:    s_mov_b32 s13, s15
+; GISEL-NEXT:    s_mov_b32 s12, s14
+; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL-NEXT:    v_or_b32_e32 v31, v0, v2
+; GISEL-NEXT:    s_getpc_b64 s[18:19]
+; GISEL-NEXT:    s_add_u32 s18, s18, ret_i32_noinline at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s19, s19, ret_i32_noinline at rel32@hi+12
+; GISEL-NEXT:    s_mov_b32 s14, s16
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; GISEL-NEXT:    flat_store_dword v[0:1], v0
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    s_endpgm
   %val = call float @ret_i32_noinline()
   %op = fadd float %val, 1.0
   store volatile float %op, ptr addrspace(1) poison
   ret void
 }
 
-; GCN-LABEL: {{^}}test_bitcast_return_type_alwaysinline:
-; GCN: s_swappc_b64
 define amdgpu_kernel void @test_bitcast_return_type_alwaysinline() #0 {
+; SDAG-LABEL: test_bitcast_return_type_alwaysinline:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; SDAG-NEXT:    s_add_i32 s12, s12, s17
+; SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; SDAG-NEXT:    s_add_u32 s0, s0, s17
+; SDAG-NEXT:    s_addc_u32 s1, s1, 0
+; SDAG-NEXT:    s_mov_b32 s13, s15
+; SDAG-NEXT:    s_mov_b32 s12, s14
+; SDAG-NEXT:    s_getpc_b64 s[18:19]
+; SDAG-NEXT:    s_add_u32 s18, s18, ret_i32_alwaysinline at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s19, s19, ret_i32_alwaysinline at rel32@hi+12
+; SDAG-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; SDAG-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG-NEXT:    v_or_b32_e32 v31, v0, v2
+; SDAG-NEXT:    s_mov_b32 s14, s16
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; SDAG-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; SDAG-NEXT:    flat_store_dword v[0:1], v0
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_bitcast_return_type_alwaysinline:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL-NEXT:    s_add_u32 s0, s0, s17
+; GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GISEL-NEXT:    s_mov_b32 s13, s15
+; GISEL-NEXT:    s_mov_b32 s12, s14
+; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL-NEXT:    v_or_b32_e32 v31, v0, v2
+; GISEL-NEXT:    s_getpc_b64 s[18:19]
+; GISEL-NEXT:    s_add_u32 s18, s18, ret_i32_alwaysinline at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s19, s19, ret_i32_alwaysinline at rel32@hi+12
+; GISEL-NEXT:    s_mov_b32 s14, s16
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; GISEL-NEXT:    flat_store_dword v[0:1], v0
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    s_endpgm
   %val = call float @ret_i32_alwaysinline()
   %op = fadd float %val, 1.0
   store volatile float %op, ptr addrspace(1) poison
   ret void
 }
 
-; GCN-LABEL: {{^}}test_bitcast_argument_type:
-; GCN: s_getpc_b64
-; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32 at rel32@lo+4
-; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32 at rel32@hi+12
-; GCN: s_swappc_b64
 define amdgpu_kernel void @test_bitcast_argument_type() #0 {
+; SDAG-LABEL: test_bitcast_argument_type:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; SDAG-NEXT:    s_add_i32 s12, s12, s17
+; SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; SDAG-NEXT:    s_add_u32 s0, s0, s17
+; SDAG-NEXT:    s_addc_u32 s1, s1, 0
+; SDAG-NEXT:    s_mov_b32 s13, s15
+; SDAG-NEXT:    s_mov_b32 s12, s14
+; SDAG-NEXT:    s_getpc_b64 s[18:19]
+; SDAG-NEXT:    s_add_u32 s18, s18, ident_i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s19, s19, ident_i32 at rel32@hi+12
+; SDAG-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; SDAG-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG-NEXT:    v_or_b32_e32 v31, v0, v2
+; SDAG-NEXT:    v_mov_b32_e32 v0, 2.0
+; SDAG-NEXT:    s_mov_b32 s14, s16
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
+; SDAG-NEXT:    flat_store_dword v[0:1], v0
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_bitcast_argument_type:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL-NEXT:    s_add_u32 s0, s0, s17
+; GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GISEL-NEXT:    s_mov_b32 s13, s15
+; GISEL-NEXT:    s_mov_b32 s12, s14
+; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GISEL-NEXT:    s_getpc_b64 s[18:19]
+; GISEL-NEXT:    s_add_u32 s18, s18, ident_i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s19, s19, ident_i32 at rel32@hi+12
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL-NEXT:    v_or_b32_e32 v31, v0, v2
+; GISEL-NEXT:    v_mov_b32_e32 v0, 2.0
+; GISEL-NEXT:    s_mov_b32 s14, s16
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
+; GISEL-NEXT:    flat_store_dword v[0:1], v0
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    s_endpgm
   %val = call i32 @ident_i32(float 2.0)
   %op = add i32 %val, 1
   store volatile i32 %op, ptr addrspace(1) poison
   ret void
 }
 
-; GCN-LABEL: {{^}}test_bitcast_argument_and_return_types:
-; GCN: s_getpc_b64
-; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32 at rel32@lo+4
-; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32 at rel32@hi+12
-; GCN: s_swappc_b64
 define amdgpu_kernel void @test_bitcast_argument_and_return_types() #0 {
+; SDAG-LABEL: test_bitcast_argument_and_return_types:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; SDAG-NEXT:    s_add_i32 s12, s12, s17
+; SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; SDAG-NEXT:    s_add_u32 s0, s0, s17
+; SDAG-NEXT:    s_addc_u32 s1, s1, 0
+; SDAG-NEXT:    s_mov_b32 s13, s15
+; SDAG-NEXT:    s_mov_b32 s12, s14
+; SDAG-NEXT:    s_getpc_b64 s[18:19]
+; SDAG-NEXT:    s_add_u32 s18, s18, ident_i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s19, s19, ident_i32 at rel32@hi+12
+; SDAG-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; SDAG-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG-NEXT:    v_or_b32_e32 v31, v0, v2
+; SDAG-NEXT:    v_mov_b32_e32 v0, 2.0
+; SDAG-NEXT:    s_mov_b32 s14, s16
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; SDAG-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; SDAG-NEXT:    flat_store_dword v[0:1], v0
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_bitcast_argument_and_return_types:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL-NEXT:    s_add_u32 s0, s0, s17
+; GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GISEL-NEXT:    s_mov_b32 s13, s15
+; GISEL-NEXT:    s_mov_b32 s12, s14
+; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GISEL-NEXT:    s_getpc_b64 s[18:19]
+; GISEL-NEXT:    s_add_u32 s18, s18, ident_i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s19, s19, ident_i32 at rel32@hi+12
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL-NEXT:    v_or_b32_e32 v31, v0, v2
+; GISEL-NEXT:    v_mov_b32_e32 v0, 2.0
+; GISEL-NEXT:    s_mov_b32 s14, s16
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; GISEL-NEXT:    flat_store_dword v[0:1], v0
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    s_endpgm
   %val = call float @ident_i32(float 2.0)
   %op = fadd float %val, 1.0
   store volatile float %op, ptr addrspace(1) poison
   ret void
 }
 
-; GCN-LABEL: {{^}}use_workitem_id_x:
-; GCN: s_waitcnt
-; GCN-NEXT: v_and_b32_e32 [[TMP:v[0-9]+]], 0x3ff, v31
-; GCN-NEXT: v_add_i32_e32 v0, vcc, [[TMP]], v0
-; GCN-NEXT: s_setpc_b64
 define hidden i32 @use_workitem_id_x(i32 %arg0) #3 {
+; GCN-LABEL: use_workitem_id_x:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v1, 0x3ff, v31
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %op = add i32 %id, %arg0
   ret i32 %op
 }
 
-; GCN-LABEL: {{^}}test_bitcast_use_workitem_id_x:
-; GCN: v_mov_b32_e32 v31, v0
-; GCN: s_getpc_b64
-; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, use_workitem_id_x at rel32@lo+4
-; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, use_workitem_id_x at rel32@hi+12
-; GCN: v_mov_b32_e32 v0, 9
-; GCN: s_swappc_b64
-; GCN: v_add_f32_e32
 define amdgpu_kernel void @test_bitcast_use_workitem_id_x() #3 {
+; SDAG-LABEL: test_bitcast_use_workitem_id_x:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; SDAG-NEXT:    s_add_i32 s12, s12, s17
+; SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; SDAG-NEXT:    s_add_u32 s0, s0, s17
+; SDAG-NEXT:    s_addc_u32 s1, s1, 0
+; SDAG-NEXT:    s_mov_b32 s13, s15
+; SDAG-NEXT:    s_mov_b32 s12, s14
+; SDAG-NEXT:    v_mov_b32_e32 v31, v0
+; SDAG-NEXT:    s_getpc_b64 s[18:19]
+; SDAG-NEXT:    s_add_u32 s18, s18, use_workitem_id_x at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s19, s19, use_workitem_id_x at rel32@hi+12
+; SDAG-NEXT:    v_mov_b32_e32 v0, 9
+; SDAG-NEXT:    s_mov_b32 s14, s16
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; SDAG-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; SDAG-NEXT:    flat_store_dword v[0:1], v0
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_bitcast_use_workitem_id_x:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL-NEXT:    s_add_u32 s0, s0, s17
+; GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GISEL-NEXT:    v_mov_b32_e32 v31, v0
+; GISEL-NEXT:    s_mov_b32 s13, s15
+; GISEL-NEXT:    s_mov_b32 s12, s14
+; GISEL-NEXT:    s_getpc_b64 s[18:19]
+; GISEL-NEXT:    s_add_u32 s18, s18, use_workitem_id_x at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s19, s19, use_workitem_id_x at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 9
+; GISEL-NEXT:    s_mov_b32 s14, s16
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; GISEL-NEXT:    flat_store_dword v[0:1], v0
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    s_endpgm
   %val = call float @use_workitem_id_x(i32 9)
   %op = fadd float %val, 1.0
   store volatile float %op, ptr addrspace(1) poison
   ret void
 }
 
-; GCN-LABEL: {{^}}test_invoke:
-; GCN: s_getpc_b64
-; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32 at rel32@lo+4
-; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32 at rel32@hi+12
-; GCN: s_swappc_b64
 @_ZTIi = external global ptr
 declare i32 @__gxx_personality_v0(...)
 define amdgpu_kernel void @test_invoke() #0 personality ptr @__gxx_personality_v0 {
+; SDAG-LABEL: test_invoke:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; SDAG-NEXT:    s_add_i32 s12, s12, s17
+; SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; SDAG-NEXT:    s_add_u32 s0, s0, s17
+; SDAG-NEXT:    s_addc_u32 s1, s1, 0
+; SDAG-NEXT:    s_mov_b32 s13, s15
+; SDAG-NEXT:    s_mov_b32 s12, s14
+; SDAG-NEXT:    s_getpc_b64 s[18:19]
+; SDAG-NEXT:    s_add_u32 s18, s18, ident_i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s19, s19, ident_i32 at rel32@hi+12
+; SDAG-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; SDAG-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG-NEXT:    v_or_b32_e32 v31, v0, v2
+; SDAG-NEXT:    v_mov_b32_e32 v0, 2.0
+; SDAG-NEXT:    s_mov_b32 s14, s16
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; SDAG-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; SDAG-NEXT:    flat_store_dword v[0:1], v0
+; SDAG-NEXT:    s_waitcnt vmcnt(0)
+; SDAG-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_invoke:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL-NEXT:    s_add_u32 s0, s0, s17
+; GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GISEL-NEXT:    s_mov_b32 s13, s15
+; GISEL-NEXT:    s_mov_b32 s12, s14
+; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GISEL-NEXT:    s_getpc_b64 s[18:19]
+; GISEL-NEXT:    s_add_u32 s18, s18, ident_i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s19, s19, ident_i32 at rel32@hi+12
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL-NEXT:    v_or_b32_e32 v31, v0, v2
+; GISEL-NEXT:    v_mov_b32_e32 v0, 2.0
+; GISEL-NEXT:    s_mov_b32 s14, s16
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; GISEL-NEXT:    flat_store_dword v[0:1], v0
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    s_endpgm
   %val = invoke float @ident_i32(float 2.0)
           to label %continue unwind label %broken
 
@@ -96,14 +353,28 @@ continue:
 ; arguments before we lower any calls to them.
 
 define hidden i32 @ret_i32_noinline() #0 {
+; GCN-LABEL: ret_i32_noinline:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, 4
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   ret i32 4
 }
 
 define hidden i32 @ret_i32_alwaysinline() #1 {
+; GCN-LABEL: ret_i32_alwaysinline:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, 4
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   ret i32 4
 }
 
 define hidden i32 @ident_i32(i32 %i) #0 {
+; GCN-LABEL: ident_i32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   ret i32 %i
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll b/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
index ffe536d347c53..4b5a49fc0c2e9 100644
--- a/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -simplify-mir -stop-after=finalize-isel < %s | FileCheck %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -simplify-mir -stop-after=finalize-isel < %s | FileCheck -check-prefixes=SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -simplify-mir -stop-after=finalize-isel < %s | FileCheck -check-prefixes=GISEL %s
 
 ; Check that call / asm get an implicit-def $mode added to them in
 ; strictfp functions.
@@ -7,46 +8,80 @@
 declare protected void @maybe_defs_mode() #0
 
 define float @call_changes_mode(float %x, float %y) #0 {
-  ; CHECK-LABEL: name: call_changes_mode
-  ; CHECK: bb.0 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
-  ; CHECK-NEXT:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def dead $scc
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]]
-  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $mode
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
-  ; CHECK-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
-  ; CHECK-NEXT:   $vgpr0 = COPY [[V_ADD_F32_e64_]]
-  ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
+  ; SDAG-LABEL: name: call_changes_mode
+  ; SDAG: bb.0 (%ir-block.0):
+  ; SDAG-NEXT:   liveins: $vgpr0, $vgpr1
+  ; SDAG-NEXT: {{  $}}
+  ; SDAG-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; SDAG-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; SDAG-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; SDAG-NEXT:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def dead $scc
+  ; SDAG-NEXT:   [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; SDAG-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]]
+  ; SDAG-NEXT:   $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $mode
+  ; SDAG-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; SDAG-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+  ; SDAG-NEXT:   $vgpr0 = COPY [[V_ADD_F32_e64_]]
+  ; SDAG-NEXT:   SI_RETURN implicit $vgpr0
+  ;
+  ; GISEL-LABEL: name: call_changes_mode
+  ; GISEL: bb.1 (%ir-block.0):
+  ; GISEL-NEXT:   liveins: $vgpr0, $vgpr1
+  ; GISEL-NEXT: {{  $}}
+  ; GISEL-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GISEL-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GISEL-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; GISEL-NEXT:   [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GISEL-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]]
+  ; GISEL-NEXT:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def $scc
+  ; GISEL-NEXT:   $sgpr30_sgpr31 = noconvergent SI_CALL [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GISEL-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; GISEL-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+  ; GISEL-NEXT:   $vgpr0 = COPY [[V_ADD_F32_e64_]]
+  ; GISEL-NEXT:   SI_RETURN implicit $vgpr0
   call void @maybe_defs_mode()
   %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore")
   ret float %val
 }
 
 define void @tail_call_changes_mode() #0 {
-  ; CHECK-LABEL: name: tail_call_changes_mode
-  ; CHECK: bb.0 (%ir-block.0):
-  ; CHECK-NEXT:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:ccr_sgpr_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def dead $scc
-  ; CHECK-NEXT:   SI_TCRETURN killed [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, 0, csr_amdgpu, implicit-def $mode
+  ; SDAG-LABEL: name: tail_call_changes_mode
+  ; SDAG: bb.0 (%ir-block.0):
+  ; SDAG-NEXT:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:ccr_sgpr_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def dead $scc
+  ; SDAG-NEXT:   SI_TCRETURN killed [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, 0, csr_amdgpu, implicit-def $mode
+  ;
+  ; GISEL-LABEL: name: tail_call_changes_mode
+  ; GISEL: bb.1 (%ir-block.0):
+  ; GISEL-NEXT:   [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GISEL-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]]
+  ; GISEL-NEXT:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:ccr_sgpr_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @maybe_defs_mode, target-flags(amdgpu-rel32-hi) @maybe_defs_mode, implicit-def $scc
+  ; GISEL-NEXT:   SI_TCRETURN [[SI_PC_ADD_REL_OFFSET]], @maybe_defs_mode, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3
   tail call void @maybe_defs_mode()
   ret void
 }
 
 define float @asm_changes_mode(float %x, float %y) #0 {
-  ; CHECK-LABEL: name: asm_changes_mode
-  ; CHECK: bb.0 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; CHECK-NEXT:   INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode
-  ; CHECK-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
-  ; CHECK-NEXT:   $vgpr0 = COPY [[V_ADD_F32_e64_]]
-  ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
+  ; SDAG-LABEL: name: asm_changes_mode
+  ; SDAG: bb.0 (%ir-block.0):
+  ; SDAG-NEXT:   liveins: $vgpr0, $vgpr1
+  ; SDAG-NEXT: {{  $}}
+  ; SDAG-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; SDAG-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; SDAG-NEXT:   INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode
+  ; SDAG-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+  ; SDAG-NEXT:   $vgpr0 = COPY [[V_ADD_F32_e64_]]
+  ; SDAG-NEXT:   SI_RETURN implicit $vgpr0
+  ;
+  ; GISEL-LABEL: name: asm_changes_mode
+  ; GISEL: bb.1 (%ir-block.0):
+  ; GISEL-NEXT:   liveins: $vgpr0, $vgpr1
+  ; GISEL-NEXT: {{  $}}
+  ; GISEL-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GISEL-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GISEL-NEXT:   INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode
+  ; GISEL-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+  ; GISEL-NEXT:   $vgpr0 = COPY [[V_ADD_F32_e64_]]
+  ; GISEL-NEXT:   SI_RETURN implicit $vgpr0
   call void asm sideeffect "; maybe defs mode", ""()
   %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore")
   ret float %val
diff --git a/llvm/test/CodeGen/AMDGPU/call-encoding.ll b/llvm/test/CodeGen/AMDGPU/call-encoding.ll
index 6954c340ca287..6c36c2424a66e 100644
--- a/llvm/test/CodeGen/AMDGPU/call-encoding.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-encoding.ll
@@ -1,5 +1,7 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=fiji -d - | FileCheck --check-prefix=GCN %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx900 -d - | FileCheck --check-prefix=GCN %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=fiji -d - | FileCheck --check-prefix=GCN %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx900 -d - | FileCheck --check-prefix=GCN %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=fiji -d - | FileCheck --check-prefix=GCN %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx900 -d - | FileCheck --check-prefix=GCN %s
 ; XUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=hawaii -d - | FileCheck --check-prefixes=GCN,CI %s
 
 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
index 4df10497bcd27..b250227735bd3 100644
--- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
@@ -1,8 +1,13 @@
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN,CI %s
-; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN,CI %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN,CI %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 | FileCheck -check-prefixes=GCN-V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
 
 ; Make sure to run a GPU with the SGPR allocation bug.
 
diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
index 61a195f9c314f..aed1079158154 100644
--- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
@@ -1,8 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF,GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR,GISEL %s
 
 declare hidden void @external_void_func_void() #3
 
@@ -223,41 +227,6 @@ define hidden void @void_func_void_clobber_vcc() #2 {
 }
 
 define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(ptr addrspace(1) %out) #0 {
-; FLATSCR-LABEL: test_call_void_func_void_clobber_vcc:
-; FLATSCR:       ; %bb.0:
-; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
-; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
-; FLATSCR-NEXT:    s_add_u32 s8, s4, 8
-; FLATSCR-NEXT:    s_addc_u32 s9, s5, 0
-; FLATSCR-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; FLATSCR-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; FLATSCR-NEXT:    s_mov_b32 s14, s12
-; FLATSCR-NEXT:    s_mov_b32 s13, s11
-; FLATSCR-NEXT:    s_mov_b32 s12, s10
-; FLATSCR-NEXT:    s_mov_b64 s[10:11], s[6:7]
-; FLATSCR-NEXT:    s_getpc_b64 s[16:17]
-; FLATSCR-NEXT:    s_add_u32 s16, s16, void_func_void_clobber_vcc at rel32@lo+4
-; FLATSCR-NEXT:    s_addc_u32 s17, s17, void_func_void_clobber_vcc at rel32@hi+12
-; FLATSCR-NEXT:    v_or3_b32 v31, v0, v1, v2
-; FLATSCR-NEXT:    s_mov_b64 s[4:5], s[0:1]
-; FLATSCR-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; FLATSCR-NEXT:    s_mov_b32 s32, 0
-; FLATSCR-NEXT:    ;;#ASMSTART
-; FLATSCR-NEXT:    ; def vcc
-; FLATSCR-NEXT:    ;;#ASMEND
-; FLATSCR-NEXT:    s_mov_b64 s[34:35], vcc
-; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; FLATSCR-NEXT:    global_load_dword v0, v[0:1], off glc
-; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    s_mov_b64 vcc, s[34:35]
-; FLATSCR-NEXT:    global_load_dword v0, v[0:1], off glc
-; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    ; kill: killed $vgpr0_vgpr1
-; FLATSCR-NEXT:    ; kill: killed $vgpr0_vgpr1
-; FLATSCR-NEXT:    ;;#ASMSTART
-; FLATSCR-NEXT:    ; use vcc
-; FLATSCR-NEXT:    ;;#ASMEND
-; FLATSCR-NEXT:    s_endpgm
   %vcc = call i64 asm sideeffect "; def $0", "={vcc}"()
   call void @void_func_void_clobber_vcc()
   %val0 = load volatile i32, ptr addrspace(1) poison
@@ -463,51 +432,11 @@ define hidden void @void_func_void_clobber_s34() #2 {
 }
 
 define amdgpu_kernel void @test_call_void_func_void_clobber_s33() #0 {
-; FLATSCR-LABEL: test_call_void_func_void_clobber_s33:
-; FLATSCR:       ; %bb.0:
-; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
-; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
-; FLATSCR-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; FLATSCR-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; FLATSCR-NEXT:    s_mov_b32 s14, s12
-; FLATSCR-NEXT:    s_mov_b32 s13, s11
-; FLATSCR-NEXT:    s_mov_b32 s12, s10
-; FLATSCR-NEXT:    s_mov_b64 s[10:11], s[6:7]
-; FLATSCR-NEXT:    s_mov_b64 s[8:9], s[4:5]
-; FLATSCR-NEXT:    s_getpc_b64 s[16:17]
-; FLATSCR-NEXT:    s_add_u32 s16, s16, void_func_void_clobber_s33 at rel32@lo+4
-; FLATSCR-NEXT:    s_addc_u32 s17, s17, void_func_void_clobber_s33 at rel32@hi+12
-; FLATSCR-NEXT:    v_or3_b32 v31, v0, v1, v2
-; FLATSCR-NEXT:    s_mov_b64 s[4:5], s[0:1]
-; FLATSCR-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; FLATSCR-NEXT:    s_mov_b32 s32, 0
-; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; FLATSCR-NEXT:    s_endpgm
   call void @void_func_void_clobber_s33()
   ret void
 }
 
 define amdgpu_kernel void @test_call_void_func_void_clobber_s34() #0 {
-; FLATSCR-LABEL: test_call_void_func_void_clobber_s34:
-; FLATSCR:       ; %bb.0:
-; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s8, s13
-; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
-; FLATSCR-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; FLATSCR-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; FLATSCR-NEXT:    s_mov_b32 s14, s12
-; FLATSCR-NEXT:    s_mov_b32 s13, s11
-; FLATSCR-NEXT:    s_mov_b32 s12, s10
-; FLATSCR-NEXT:    s_mov_b64 s[10:11], s[6:7]
-; FLATSCR-NEXT:    s_mov_b64 s[8:9], s[4:5]
-; FLATSCR-NEXT:    s_getpc_b64 s[16:17]
-; FLATSCR-NEXT:    s_add_u32 s16, s16, void_func_void_clobber_s34 at rel32@lo+4
-; FLATSCR-NEXT:    s_addc_u32 s17, s17, void_func_void_clobber_s34 at rel32@hi+12
-; FLATSCR-NEXT:    v_or3_b32 v31, v0, v1, v2
-; FLATSCR-NEXT:    s_mov_b64 s[4:5], s[0:1]
-; FLATSCR-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; FLATSCR-NEXT:    s_mov_b32 s32, 0
-; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; FLATSCR-NEXT:    s_endpgm
   call void @void_func_void_clobber_s34()
   ret void
 }
@@ -748,3 +677,6 @@ attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind noinline }
 attributes #3 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GISEL: {{.*}}
+; SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/call-return-types.ll b/llvm/test/CodeGen/AMDGPU/call-return-types.ll
index c0f74fd85f0e6..b48cd139347c9 100644
--- a/llvm/test/CodeGen/AMDGPU/call-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-return-types.ll
@@ -1,7 +1,10 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX89 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX89 %s
 
 declare void @external_void_func_void() #0
 
diff --git a/llvm/test/CodeGen/AMDGPU/call-skip.ll b/llvm/test/CodeGen/AMDGPU/call-skip.ll
index ea2bba1673a0b..e2ca278d687be 100644
--- a/llvm/test/CodeGen/AMDGPU/call-skip.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-skip.ll
@@ -1,4 +1,6 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel=0 -mcpu=hawaii < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SDAG %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel=1 -mcpu=hawaii < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GISEL %s
 
 ; A call should be skipped if all lanes are zero, since we don't know
 ; what side effects should be avoided inside the call.
@@ -6,12 +8,37 @@ define hidden void @func() #1 {
   ret void
 }
 
-; GCN-LABEL: {{^}}if_call:
-; GCN: s_and_saveexec_b64
-; GCN-NEXT: s_cbranch_execz [[END:.LBB[0-9]+_[0-9]+]]
-; GCN: s_swappc_b64
-; GCN: [[END]]:
 define void @if_call(i32 %flag) #0 {
+; GCN-LABEL: if_call:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s20, s33
+; GCN-NEXT:    s_mov_b32 s33, s32
+; GCN-NEXT:    s_xor_saveexec_b64 s[16:17], -1
+; GCN-NEXT:    buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT:    s_mov_b64 exec, s[16:17]
+; GCN-NEXT:    v_writelane_b32 v1, s30, 0
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT:    s_addk_i32 s32, 0x400
+; GCN-NEXT:    v_writelane_b32 v1, s31, 1
+; GCN-NEXT:    s_and_saveexec_b64 s[16:17], vcc
+; GCN-NEXT:    s_cbranch_execz .LBB1_2
+; GCN-NEXT:  ; %bb.1: ; %call
+; GCN-NEXT:    s_getpc_b64 s[18:19]
+; GCN-NEXT:    s_add_u32 s18, s18, func at rel32@lo+4
+; GCN-NEXT:    s_addc_u32 s19, s19, func at rel32@hi+12
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GCN-NEXT:  .LBB1_2: ; %end
+; GCN-NEXT:    s_or_b64 exec, exec, s[16:17]
+; GCN-NEXT:    v_readlane_b32 s31, v1, 1
+; GCN-NEXT:    v_readlane_b32 s30, v1, 0
+; GCN-NEXT:    s_mov_b32 s32, s33
+; GCN-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN-NEXT:    s_mov_b32 s33, s20
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cc = icmp eq i32 %flag, 0
   br i1 %cc, label %call, label %end
 
@@ -23,12 +50,20 @@ end:
   ret void
 }
 
-; GCN-LABEL: {{^}}if_asm:
-; GCN: s_and_saveexec_b64
-; GCN-NEXT: s_cbranch_execz [[END:.LBB[0-9]+_[0-9]+]]
-; GCN: ; sample asm
-; GCN: [[END]]:
 define void @if_asm(i32 %flag) #0 {
+; GCN-LABEL: if_asm:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GCN-NEXT:    s_cbranch_execz .LBB2_2
+; GCN-NEXT:  ; %bb.1: ; %call
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; sample asm
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:  .LBB2_2: ; %end
+; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %cc = icmp eq i32 %flag, 0
   br i1 %cc, label %call, label %end
 
@@ -40,11 +75,58 @@ end:
   ret void
 }
 
-; GCN-LABEL: {{^}}if_call_kernel:
-; GCN: s_and_saveexec_b64
-; GCN-NEXT: s_cbranch_execz .LBB3_2
-; GCN: s_swappc_b64
 define amdgpu_kernel void @if_call_kernel() #0 {
+; SDAG-LABEL: if_call_kernel:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_add_i32 s12, s12, s17
+; SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; SDAG-NEXT:    s_add_u32 s0, s0, s17
+; SDAG-NEXT:    s_addc_u32 s1, s1, 0
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; SDAG-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; SDAG-NEXT:    s_cbranch_execz .LBB3_2
+; SDAG-NEXT:  ; %bb.1: ; %call
+; SDAG-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; SDAG-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG-NEXT:    s_getpc_b64 s[18:19]
+; SDAG-NEXT:    s_add_u32 s18, s18, func at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s19, s19, func at rel32@hi+12
+; SDAG-NEXT:    v_or_b32_e32 v31, v0, v2
+; SDAG-NEXT:    s_mov_b32 s12, s14
+; SDAG-NEXT:    s_mov_b32 s13, s15
+; SDAG-NEXT:    s_mov_b32 s14, s16
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; SDAG-NEXT:  .LBB3_2: ; %end
+; SDAG-NEXT:    s_endpgm
+;
+; GISEL-LABEL: if_call_kernel:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_add_i32 s12, s12, s17
+; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL-NEXT:    s_add_u32 s0, s0, s17
+; GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_cbranch_execz .LBB3_2
+; GISEL-NEXT:  ; %bb.1: ; %call
+; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
+; GISEL-NEXT:    s_getpc_b64 s[18:19]
+; GISEL-NEXT:    s_add_u32 s18, s18, func at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s19, s19, func at rel32@hi+12
+; GISEL-NEXT:    v_or_b32_e32 v31, v0, v1
+; GISEL-NEXT:    s_mov_b32 s12, s14
+; GISEL-NEXT:    s_mov_b32 s13, s15
+; GISEL-NEXT:    s_mov_b32 s14, s16
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT:  .LBB3_2: ; %end
+; GISEL-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %cc = icmp eq i32 %id, 0
   br i1 %cc, label %call, label %end
diff --git a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
index 675acd0eedfc5..a52942cae1699 100644
--- a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GISEL %s
 
 ; Load argument depends on waitcnt which should be skipped.
 define amdgpu_kernel void @call_memory_arg_load(ptr addrspace(3) %ptr, i32) #0 {
@@ -27,24 +28,43 @@ define amdgpu_kernel void @call_memory_arg_load(ptr addrspace(3) %ptr, i32) #0 {
 
 ; Memory waitcnt with no register dependence on the call
 define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 {
-; GCN-LABEL: call_memory_no_dep:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
-; GCN-NEXT:    s_add_u32 flat_scratch_lo, s8, s11
-; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
-; GCN-NEXT:    s_add_u32 s0, s0, s11
-; GCN-NEXT:    s_addc_u32 s1, s1, 0
-; GCN-NEXT:    v_mov_b32_e32 v0, 0
-; GCN-NEXT:    s_getpc_b64 s[8:9]
-; GCN-NEXT:    s_add_u32 s8, s8, func at rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s9, s9, func at rel32@hi+12
-; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    global_store_dword v0, v0, s[6:7]
-; GCN-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GCN-NEXT:    v_mov_b32_e32 v0, 0
-; GCN-NEXT:    s_mov_b32 s32, 0
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; GCN-NEXT:    s_endpgm
+; SDAG-LABEL: call_memory_no_dep:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; SDAG-NEXT:    s_add_u32 flat_scratch_lo, s8, s11
+; SDAG-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; SDAG-NEXT:    s_add_u32 s0, s0, s11
+; SDAG-NEXT:    s_addc_u32 s1, s1, 0
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-NEXT:    s_getpc_b64 s[8:9]
+; SDAG-NEXT:    s_add_u32 s8, s8, func at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s9, s9, func at rel32@hi+12
+; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-NEXT:    global_store_dword v0, v0, s[6:7]
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[4:5]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; SDAG-NEXT:    s_endpgm
+;
+; GISEL-LABEL: call_memory_no_dep:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GISEL-NEXT:    s_add_u32 flat_scratch_lo, s8, s11
+; GISEL-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; GISEL-NEXT:    s_add_u32 s0, s0, s11
+; GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-NEXT:    s_getpc_b64 s[8:9]
+; GISEL-NEXT:    s_add_u32 s8, s8, func at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s9, s9, func at rel32@hi+12
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    global_store_dword v0, v0, s[6:7]
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; GISEL-NEXT:    s_endpgm
   store i32 0, ptr addrspace(1) %ptr
   call void @func(i32 0)
   ret void
@@ -52,46 +72,82 @@ define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 {
 
 ; Should not wait after the call before memory
 define amdgpu_kernel void @call_no_wait_after_call(ptr addrspace(1) %ptr, i32) #0 {
-; GCN-LABEL: call_no_wait_after_call:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_add_u32 flat_scratch_lo, s8, s11
-; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
-; GCN-NEXT:    s_load_dwordx2 s[34:35], s[6:7], 0x0
-; GCN-NEXT:    s_add_u32 s0, s0, s11
-; GCN-NEXT:    s_addc_u32 s1, s1, 0
-; GCN-NEXT:    s_getpc_b64 s[8:9]
-; GCN-NEXT:    s_add_u32 s8, s8, func at rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s9, s9, func at rel32@hi+12
-; GCN-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GCN-NEXT:    v_mov_b32_e32 v0, 0
-; GCN-NEXT:    s_mov_b32 s32, 0
-; GCN-NEXT:    v_mov_b32_e32 v40, 0
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; GCN-NEXT:    global_store_dword v40, v40, s[34:35]
-; GCN-NEXT:    s_endpgm
+; SDAG-LABEL: call_no_wait_after_call:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_add_u32 flat_scratch_lo, s8, s11
+; SDAG-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; SDAG-NEXT:    s_load_dwordx2 s[34:35], s[6:7], 0x0
+; SDAG-NEXT:    s_add_u32 s0, s0, s11
+; SDAG-NEXT:    s_addc_u32 s1, s1, 0
+; SDAG-NEXT:    s_getpc_b64 s[8:9]
+; SDAG-NEXT:    s_add_u32 s8, s8, func at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s9, s9, func at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[4:5]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    v_mov_b32_e32 v40, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; SDAG-NEXT:    global_store_dword v40, v40, s[34:35]
+; SDAG-NEXT:    s_endpgm
+;
+; GISEL-LABEL: call_no_wait_after_call:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_add_u32 flat_scratch_lo, s8, s11
+; GISEL-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; GISEL-NEXT:    s_load_dwordx2 s[34:35], s[6:7], 0x0
+; GISEL-NEXT:    s_add_u32 s0, s0, s11
+; GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GISEL-NEXT:    s_getpc_b64 s[8:9]
+; GISEL-NEXT:    s_add_u32 s8, s8, func at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s9, s9, func at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-NEXT:    global_store_dword v0, v0, s[34:35]
+; GISEL-NEXT:    s_endpgm
   call void @func(i32 0)
   store i32 0, ptr addrspace(1) %ptr
   ret void
 }
 
 define amdgpu_kernel void @call_no_wait_after_call_return_val(ptr addrspace(1) %ptr, i32) #0 {
-; GCN-LABEL: call_no_wait_after_call_return_val:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_add_u32 flat_scratch_lo, s8, s11
-; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
-; GCN-NEXT:    s_load_dwordx2 s[34:35], s[6:7], 0x0
-; GCN-NEXT:    s_add_u32 s0, s0, s11
-; GCN-NEXT:    s_addc_u32 s1, s1, 0
-; GCN-NEXT:    s_getpc_b64 s[8:9]
-; GCN-NEXT:    s_add_u32 s8, s8, func.return at rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s9, s9, func.return at rel32@hi+12
-; GCN-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GCN-NEXT:    v_mov_b32_e32 v0, 0
-; GCN-NEXT:    s_mov_b32 s32, 0
-; GCN-NEXT:    v_mov_b32_e32 v40, 0
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; GCN-NEXT:    global_store_dword v40, v0, s[34:35]
-; GCN-NEXT:    s_endpgm
+; SDAG-LABEL: call_no_wait_after_call_return_val:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_add_u32 flat_scratch_lo, s8, s11
+; SDAG-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; SDAG-NEXT:    s_load_dwordx2 s[34:35], s[6:7], 0x0
+; SDAG-NEXT:    s_add_u32 s0, s0, s11
+; SDAG-NEXT:    s_addc_u32 s1, s1, 0
+; SDAG-NEXT:    s_getpc_b64 s[8:9]
+; SDAG-NEXT:    s_add_u32 s8, s8, func.return at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s9, s9, func.return at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[4:5]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    v_mov_b32_e32 v40, 0
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; SDAG-NEXT:    global_store_dword v40, v0, s[34:35]
+; SDAG-NEXT:    s_endpgm
+;
+; GISEL-LABEL: call_no_wait_after_call_return_val:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_add_u32 flat_scratch_lo, s8, s11
+; GISEL-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; GISEL-NEXT:    s_load_dwordx2 s[34:35], s[6:7], 0x0
+; GISEL-NEXT:    s_add_u32 s0, s0, s11
+; GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GISEL-NEXT:    s_getpc_b64 s[8:9]
+; GISEL-NEXT:    s_add_u32 s8, s8, func.return at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s9, s9, func.return at rel32@hi+12
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-NEXT:    global_store_dword v1, v0, s[34:35]
+; GISEL-NEXT:    s_endpgm
   %rv = call i32 @func.return(i32 0)
   store i32 %rv, ptr addrspace(1) %ptr
   ret void
@@ -99,22 +155,39 @@ define amdgpu_kernel void @call_no_wait_after_call_return_val(ptr addrspace(1) %
 
 ; Need to wait for the address dependency
 define amdgpu_kernel void @call_got_load(ptr addrspace(1) %ptr, i32) #0 {
-; GCN-LABEL: call_got_load:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_add_u32 flat_scratch_lo, s8, s11
-; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
-; GCN-NEXT:    s_add_u32 s0, s0, s11
-; GCN-NEXT:    s_addc_u32 s1, s1, 0
-; GCN-NEXT:    s_getpc_b64 s[6:7]
-; GCN-NEXT:    s_add_u32 s6, s6, got.func at gotpcrel32@lo+4
-; GCN-NEXT:    s_addc_u32 s7, s7, got.func at gotpcrel32@hi+12
-; GCN-NEXT:    s_load_dwordx2 s[8:9], s[6:7], 0x0
-; GCN-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GCN-NEXT:    v_mov_b32_e32 v0, 0
-; GCN-NEXT:    s_mov_b32 s32, 0
-; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; GCN-NEXT:    s_endpgm
+; SDAG-LABEL: call_got_load:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_add_u32 flat_scratch_lo, s8, s11
+; SDAG-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; SDAG-NEXT:    s_add_u32 s0, s0, s11
+; SDAG-NEXT:    s_addc_u32 s1, s1, 0
+; SDAG-NEXT:    s_getpc_b64 s[6:7]
+; SDAG-NEXT:    s_add_u32 s6, s6, got.func at gotpcrel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s7, s7, got.func at gotpcrel32@hi+12
+; SDAG-NEXT:    s_load_dwordx2 s[8:9], s[6:7], 0x0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[4:5]
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; SDAG-NEXT:    s_endpgm
+;
+; GISEL-LABEL: call_got_load:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_add_u32 flat_scratch_lo, s8, s11
+; GISEL-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; GISEL-NEXT:    s_add_u32 s0, s0, s11
+; GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GISEL-NEXT:    s_getpc_b64 s[6:7]
+; GISEL-NEXT:    s_add_u32 s6, s6, got.func at gotpcrel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s7, s7, got.func at gotpcrel32@hi+12
+; GISEL-NEXT:    s_load_dwordx2 s[8:9], s[6:7], 0x0
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; GISEL-NEXT:    s_endpgm
   call void @got.func(i32 0)
   ret void
 }

>From 646b794abc2ae3ee5e8e7c337afbc49606f116cf Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <ChinmayDiwakar.Deshpande at amd.com>
Date: Wed, 5 Nov 2025 12:58:57 -0500
Subject: [PATCH 3/4] [NFC][AMDGPU][GISel] Fix test

---
 llvm/test/CodeGen/AMDGPU/call-args-inreg.ll   | 474 +-----------------
 llvm/test/CodeGen/AMDGPU/call-return-types.ll |   4 +-
 2 files changed, 3 insertions(+), 475 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
index 3d3a4041c89bb..f96007ae513bd 100644
--- a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,SDAG %s
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GISEL %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GISEL %s
 
 declare hidden void @external_void_func_i8_inreg(i8 inreg) #0
@@ -212,35 +212,6 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
 }
 
 define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_i64_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s18, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-NEXT:    v_writelane_b32 v40, s18, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[18:19]
-; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_i64_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_i64_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s1, s17
-; GFX9-NEXT:    s_mov_b32 s0, s16
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX11-LABEL: test_call_external_void_func_i64_inreg:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -273,35 +244,6 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 {
 }
 
 define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v2i32_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s18, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-NEXT:    v_writelane_b32 v40, s18, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[18:19]
-; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_v2i32_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_v2i32_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s1, s17
-; GFX9-NEXT:    s_mov_b32 s0, s16
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX11-LABEL: test_call_external_void_func_v2i32_inreg:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -334,36 +276,6 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 {
 }
 
 define void @test_call_external_void_func_v3i32_inreg(<3 x i32> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v3i32_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s19, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-NEXT:    v_writelane_b32 v40, s19, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[20:21]
-; GFX9-NEXT:    s_add_u32 s20, s20, external_void_func_v3i32_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s21, s21, external_void_func_v3i32_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s2, s18
-; GFX9-NEXT:    s_mov_b32 s1, s17
-; GFX9-NEXT:    s_mov_b32 s0, s16
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[20:21]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX11-LABEL: test_call_external_void_func_v3i32_inreg:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -396,37 +308,6 @@ define void @test_call_external_void_func_v3i32_inreg(<3 x i32> inreg %arg) #0 {
 }
 
 define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v4i32_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s20, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[22:23], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[22:23]
-; GFX9-NEXT:    v_writelane_b32 v40, s20, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[20:21]
-; GFX9-NEXT:    s_add_u32 s20, s20, external_void_func_v4i32_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s21, s21, external_void_func_v4i32_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s3, s19
-; GFX9-NEXT:    s_mov_b32 s2, s18
-; GFX9-NEXT:    s_mov_b32 s1, s17
-; GFX9-NEXT:    s_mov_b32 s0, s16
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[20:21]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX11-LABEL: test_call_external_void_func_v4i32_inreg:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -459,41 +340,6 @@ define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 {
 }
 
 define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v8i32_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s24, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[26:27], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[26:27]
-; GFX9-NEXT:    v_writelane_b32 v40, s24, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[24:25]
-; GFX9-NEXT:    s_add_u32 s24, s24, external_void_func_v8i32_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s25, s25, external_void_func_v8i32_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s3, s19
-; GFX9-NEXT:    s_mov_b32 s2, s18
-; GFX9-NEXT:    s_mov_b32 s1, s17
-; GFX9-NEXT:    s_mov_b32 s0, s16
-; GFX9-NEXT:    s_mov_b32 s16, s20
-; GFX9-NEXT:    s_mov_b32 s17, s21
-; GFX9-NEXT:    s_mov_b32 s18, s22
-; GFX9-NEXT:    s_mov_b32 s19, s23
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[24:25]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX11-LABEL: test_call_external_void_func_v8i32_inreg:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -646,35 +492,6 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 {
 }
 
 define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_f64_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s18, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-NEXT:    v_writelane_b32 v40, s18, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[18:19]
-; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_f64_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_f64_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s1, s17
-; GFX9-NEXT:    s_mov_b32 s0, s16
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX11-LABEL: test_call_external_void_func_f64_inreg:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -767,35 +584,6 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0
 }
 
 define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v3f16_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s18, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-NEXT:    v_writelane_b32 v40, s18, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[18:19]
-; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_v3f16_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_v3f16_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s1, s17
-; GFX9-NEXT:    s_mov_b32 s0, s16
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX11-LABEL: test_call_external_void_func_v3f16_inreg:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -828,35 +616,6 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0
 }
 
 define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v4f16_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s18, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-NEXT:    v_writelane_b32 v40, s18, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[18:19]
-; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_v4f16_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_v4f16_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s1, s17
-; GFX9-NEXT:    s_mov_b32 s0, s16
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX11-LABEL: test_call_external_void_func_v4f16_inreg:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -889,35 +648,6 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0
 }
 
 define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_p0_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s18, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-NEXT:    v_writelane_b32 v40, s18, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[18:19]
-; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_p0_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_p0_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s1, s17
-; GFX9-NEXT:    s_mov_b32 s0, s16
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX11-LABEL: test_call_external_void_func_p0_inreg:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -950,35 +680,6 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 {
 }
 
 define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_p1_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s18, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-NEXT:    v_writelane_b32 v40, s18, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[18:19]
-; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_p1_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_p1_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s1, s17
-; GFX9-NEXT:    s_mov_b32 s0, s16
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX11-LABEL: test_call_external_void_func_p1_inreg:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1071,37 +772,6 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg)
 }
 
 define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v2p1_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s20, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[22:23], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[22:23]
-; GFX9-NEXT:    v_writelane_b32 v40, s20, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[20:21]
-; GFX9-NEXT:    s_add_u32 s20, s20, external_void_func_v2p1_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s21, s21, external_void_func_v2p1_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s3, s19
-; GFX9-NEXT:    s_mov_b32 s2, s18
-; GFX9-NEXT:    s_mov_b32 s1, s17
-; GFX9-NEXT:    s_mov_b32 s0, s16
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[20:21]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX11-LABEL: test_call_external_void_func_v2p1_inreg:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1134,35 +804,6 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre
 }
 
 define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg %arg) #0 {
-; GFX9-LABEL: test_call_external_void_func_v2p5_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s18, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[20:21], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-NEXT:    v_writelane_b32 v40, s18, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[18:19]
-; GFX9-NEXT:    s_add_u32 s18, s18, external_void_func_v2p5_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s19, s19, external_void_func_v2p5_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s1, s17
-; GFX9-NEXT:    s_mov_b32 s0, s16
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX11-LABEL: test_call_external_void_func_v2p5_inreg:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1195,38 +836,6 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre
 }
 
 define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg %arg0, i32 inreg %arg1, i64 inreg %arg2) #0 {
-; GFX9-LABEL: test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s21, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[22:23], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[22:23]
-; GFX9-NEXT:    v_writelane_b32 v40, s21, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[22:23]
-; GFX9-NEXT:    s_add_u32 s22, s22, external_void_func_i64_inreg_i32_inreg_i64_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s23, s23, external_void_func_i64_inreg_i32_inreg_i64_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s3, s19
-; GFX9-NEXT:    s_mov_b32 s2, s18
-; GFX9-NEXT:    s_mov_b32 s1, s17
-; GFX9-NEXT:    s_mov_b32 s0, s16
-; GFX9-NEXT:    s_mov_b32 s16, s20
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[22:23]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX11-LABEL: test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1259,46 +868,6 @@ define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inre
 }
 
 define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) #0 {
-; GFX9-LABEL: test_call_external_void_func_a15i32_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s29, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[40:41], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[40:41]
-; GFX9-NEXT:    v_writelane_b32 v40, s29, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[40:41]
-; GFX9-NEXT:    s_add_u32 s40, s40, external_void_func_a15i32_inreg at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s41, s41, external_void_func_a15i32_inreg at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s3, s19
-; GFX9-NEXT:    s_mov_b32 s2, s18
-; GFX9-NEXT:    s_mov_b32 s1, s17
-; GFX9-NEXT:    s_mov_b32 s0, s16
-; GFX9-NEXT:    s_mov_b32 s16, s20
-; GFX9-NEXT:    s_mov_b32 s17, s21
-; GFX9-NEXT:    s_mov_b32 s18, s22
-; GFX9-NEXT:    s_mov_b32 s19, s23
-; GFX9-NEXT:    s_mov_b32 s20, s24
-; GFX9-NEXT:    s_mov_b32 s21, s25
-; GFX9-NEXT:    s_mov_b32 s22, s26
-; GFX9-NEXT:    s_mov_b32 s23, s27
-; GFX9-NEXT:    s_mov_b32 s24, s28
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[40:41]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX11-LABEL: test_call_external_void_func_a15i32_inreg:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1333,47 +902,6 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) #
 
 ; FIXME: This should also fail
 define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inreg %arg0, i32 inreg %arg1) #1 {
-; GFX9-LABEL: test_call_external_void_func_a15i32_inreg_i32_inreg:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s21, s33
-; GFX9-NEXT:    s_mov_b32 s33, s32
-; GFX9-NEXT:    s_or_saveexec_b64 s[22:23], -1
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT:    s_mov_b64 exec, s[22:23]
-; GFX9-NEXT:    v_writelane_b32 v40, s21, 2
-; GFX9-NEXT:    s_addk_i32 s32, 0x400
-; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
-; GFX9-NEXT:    s_getpc_b64 s[22:23]
-; GFX9-NEXT:    s_add_u32 s22, s22, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s23, s23, external_void_func_a15i32_inreg_i32_inreg__noimplicit at rel32@hi+12
-; GFX9-NEXT:    s_mov_b32 s3, s7
-; GFX9-NEXT:    s_mov_b32 s2, s6
-; GFX9-NEXT:    s_mov_b32 s1, s5
-; GFX9-NEXT:    s_mov_b32 s0, s4
-; GFX9-NEXT:    s_mov_b32 s4, s8
-; GFX9-NEXT:    s_mov_b32 s5, s9
-; GFX9-NEXT:    s_mov_b32 s6, s10
-; GFX9-NEXT:    s_mov_b32 s7, s11
-; GFX9-NEXT:    s_mov_b32 s8, s15
-; GFX9-NEXT:    s_mov_b32 s9, s16
-; GFX9-NEXT:    s_mov_b32 s10, s17
-; GFX9-NEXT:    s_mov_b32 s11, s18
-; GFX9-NEXT:    s_mov_b32 s15, s19
-; GFX9-NEXT:    s_mov_b32 s16, s20
-; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
-; GFX9-NEXT:    s_swappc_b64 s[30:31], s[22:23]
-; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
-; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
-; GFX9-NEXT:    s_mov_b32 s32, s33
-; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
-; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX9-NEXT:    s_mov_b32 s33, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX11-LABEL: test_call_external_void_func_a15i32_inreg_i32_inreg:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/call-return-types.ll b/llvm/test/CodeGen/AMDGPU/call-return-types.ll
index b48cd139347c9..ecb0e3f3c8f95 100644
--- a/llvm/test/CodeGen/AMDGPU/call-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-return-types.ll
@@ -2,9 +2,9 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX11 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX89 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX89 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,GFX89 %s
 
 declare void @external_void_func_void() #0
 

>From 4a17c23fee37744e7e96acd080950f77e60adc98 Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <ChinmayDiwakar.Deshpande at amd.com>
Date: Thu, 6 Nov 2025 12:57:01 -0500
Subject: [PATCH 4/4] [NFC][AMDGPU][GISel] Fix rebase

---
 .../CodeGen/AMDGPU/call-argument-types.ll     | 209 +++++++++++++++++-
 1 file changed, 207 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
index 1bc334f27f0fa..c407f7645315d 100644
--- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
@@ -5163,6 +5163,211 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
 }
 
 define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
+; VI-LABEL: test_call_external_void_func_v32i32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
+; VI-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
+; VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
+; VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
+; VI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
+; VI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
+; VI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
+; VI-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
+; VI-NEXT:    s_mov_b32 s38, -1
+; VI-NEXT:    s_mov_b32 s39, 0xe80000
+; VI-NEXT:    s_add_u32 s36, s36, s3
+; VI-NEXT:    s_addc_u32 s37, s37, 0
+; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT:    s_mov_b32 s32, 0
+; VI-NEXT:    s_getpc_b64 s[8:9]
+; VI-NEXT:    s_add_u32 s8, s8, external_void_func_v32i32 at rel32@lo+4
+; VI-NEXT:    s_addc_u32 s9, s9, external_void_func_v32i32 at rel32@hi+12
+; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT:    s_waitcnt vmcnt(6)
+; VI-NEXT:    buffer_store_dword v31, off, s[36:39], s32
+; VI-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; VI-NEXT:    s_endpgm
+;
+; CI-LABEL: test_call_external_void_func_v32i32:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
+; CI-NEXT:    s_mov_b32 s7, 0xf000
+; CI-NEXT:    s_mov_b32 s6, -1
+; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
+; CI-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
+; CI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
+; CI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
+; CI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
+; CI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
+; CI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
+; CI-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
+; CI-NEXT:    s_mov_b32 s38, -1
+; CI-NEXT:    s_mov_b32 s39, 0xe8f000
+; CI-NEXT:    s_add_u32 s36, s36, s3
+; CI-NEXT:    s_addc_u32 s37, s37, 0
+; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT:    s_mov_b32 s32, 0
+; CI-NEXT:    s_getpc_b64 s[8:9]
+; CI-NEXT:    s_add_u32 s8, s8, external_void_func_v32i32 at rel32@lo+4
+; CI-NEXT:    s_addc_u32 s9, s9, external_void_func_v32i32 at rel32@hi+12
+; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT:    s_waitcnt vmcnt(6)
+; CI-NEXT:    buffer_store_dword v31, off, s[36:39], s32
+; CI-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; CI-NEXT:    s_endpgm
+;
+; SDAG-LABEL: test_call_external_void_func_v32i32:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
+; SDAG-NEXT:    s_mov_b32 s7, 0xf000
+; SDAG-NEXT:    s_mov_b32 s6, -1
+; SDAG-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SDAG-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SDAG-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
+; SDAG-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
+; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
+; SDAG-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
+; SDAG-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
+; SDAG-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
+; SDAG-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
+; SDAG-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
+; SDAG-NEXT:    s_mov_b32 s38, -1
+; SDAG-NEXT:    s_mov_b32 s39, 0xe00000
+; SDAG-NEXT:    s_add_u32 s36, s36, s3
+; SDAG-NEXT:    s_addc_u32 s37, s37, 0
+; SDAG-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; SDAG-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SDAG-NEXT:    s_mov_b32 s32, 0
+; SDAG-NEXT:    s_getpc_b64 s[8:9]
+; SDAG-NEXT:    s_add_u32 s8, s8, external_void_func_v32i32 at rel32@lo+4
+; SDAG-NEXT:    s_addc_u32 s9, s9, external_void_func_v32i32 at rel32@hi+12
+; SDAG-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SDAG-NEXT:    s_waitcnt vmcnt(6)
+; SDAG-NEXT:    buffer_store_dword v31, off, s[36:39], s32
+; SDAG-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; SDAG-NEXT:    s_endpgm
+;
+; GFX11-LABEL: test_call_external_void_func_v32i32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
+; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-NEXT:    s_mov_b32 s6, -1
+; GFX11-NEXT:    s_mov_b32 s32, 0
+; GFX11-NEXT:    s_getpc_b64 s[2:3]
+; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v32i32 at rel32@lo+4
+; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v32i32 at rel32@hi+12
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    s_clause 0x7
+; GFX11-NEXT:    buffer_load_b128 v[28:31], off, s[4:7], 0 offset:112
+; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[4:7], 0
+; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16
+; GFX11-NEXT:    buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32
+; GFX11-NEXT:    buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48
+; GFX11-NEXT:    buffer_load_b128 v[16:19], off, s[4:7], 0 offset:64
+; GFX11-NEXT:    buffer_load_b128 v[20:23], off, s[4:7], 0 offset:80
+; GFX11-NEXT:    buffer_load_b128 v[24:27], off, s[4:7], 0 offset:96
+; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GFX11-NEXT:    s_waitcnt vmcnt(7)
+; GFX11-NEXT:    scratch_store_b32 off, v31, s32
+; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
+; GFX11-NEXT:    s_endpgm
+;
+; HSA-LABEL: test_call_external_void_func_v32i32:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_add_i32 s6, s6, s9
+; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
+; HSA-NEXT:    s_add_u32 s0, s0, s9
+; HSA-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
+; HSA-NEXT:    s_mov_b32 s11, 0x1100f000
+; HSA-NEXT:    s_mov_b32 s10, -1
+; HSA-NEXT:    s_addc_u32 s1, s1, 0
+; HSA-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-NEXT:    buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112
+; HSA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
+; HSA-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
+; HSA-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
+; HSA-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
+; HSA-NEXT:    buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64
+; HSA-NEXT:    buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80
+; HSA-NEXT:    buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96
+; HSA-NEXT:    s_mov_b32 s32, 0
+; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
+; HSA-NEXT:    s_getpc_b64 s[12:13]
+; HSA-NEXT:    s_add_u32 s12, s12, external_void_func_v32i32 at rel32@lo+4
+; HSA-NEXT:    s_addc_u32 s13, s13, external_void_func_v32i32 at rel32@hi+12
+; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
+; HSA-NEXT:    s_waitcnt vmcnt(7)
+; HSA-NEXT:    buffer_store_dword v31, off, s[0:3], s32
+; HSA-NEXT:    s_swappc_b64 s[30:31], s[12:13]
+; HSA-NEXT:    s_endpgm
+;
+; GISEL-LABEL: test_call_external_void_func_v32i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GISEL-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
+; GISEL-NEXT:    s_mov_b32 s53, SCRATCH_RSRC_DWORD1
+; GISEL-NEXT:    s_mov_b32 s54, -1
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    s_load_dwordx16 s[8:23], s[0:1], 0x40
+; GISEL-NEXT:    s_load_dwordx16 s[36:51], s[0:1], 0x0
+; GISEL-NEXT:    s_mov_b32 s55, 0xe00000
+; GISEL-NEXT:    s_add_u32 s52, s52, s3
+; GISEL-NEXT:    s_addc_u32 s53, s53, 0
+; GISEL-NEXT:    s_mov_b32 s32, 0
+; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-NEXT:    v_mov_b32_e32 v0, s23
+; GISEL-NEXT:    s_mov_b64 s[0:1], s[52:53]
+; GISEL-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-NEXT:    s_add_u32 s4, s4, external_void_func_v32i32 at rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s5, s5, external_void_func_v32i32 at rel32@hi+12
+; GISEL-NEXT:    buffer_store_dword v0, off, s[52:55], s32
+; GISEL-NEXT:    v_mov_b32_e32 v0, s36
+; GISEL-NEXT:    v_mov_b32_e32 v1, s37
+; GISEL-NEXT:    v_mov_b32_e32 v2, s38
+; GISEL-NEXT:    v_mov_b32_e32 v3, s39
+; GISEL-NEXT:    v_mov_b32_e32 v4, s40
+; GISEL-NEXT:    v_mov_b32_e32 v5, s41
+; GISEL-NEXT:    v_mov_b32_e32 v6, s42
+; GISEL-NEXT:    v_mov_b32_e32 v7, s43
+; GISEL-NEXT:    v_mov_b32_e32 v8, s44
+; GISEL-NEXT:    v_mov_b32_e32 v9, s45
+; GISEL-NEXT:    v_mov_b32_e32 v10, s46
+; GISEL-NEXT:    v_mov_b32_e32 v11, s47
+; GISEL-NEXT:    v_mov_b32_e32 v12, s48
+; GISEL-NEXT:    v_mov_b32_e32 v13, s49
+; GISEL-NEXT:    v_mov_b32_e32 v14, s50
+; GISEL-NEXT:    v_mov_b32_e32 v15, s51
+; GISEL-NEXT:    v_mov_b32_e32 v16, s8
+; GISEL-NEXT:    v_mov_b32_e32 v17, s9
+; GISEL-NEXT:    v_mov_b32_e32 v18, s10
+; GISEL-NEXT:    v_mov_b32_e32 v19, s11
+; GISEL-NEXT:    s_mov_b64 s[2:3], s[54:55]
+; GISEL-NEXT:    v_mov_b32_e32 v20, s12
+; GISEL-NEXT:    v_mov_b32_e32 v21, s13
+; GISEL-NEXT:    v_mov_b32_e32 v22, s14
+; GISEL-NEXT:    v_mov_b32_e32 v23, s15
+; GISEL-NEXT:    v_mov_b32_e32 v24, s16
+; GISEL-NEXT:    v_mov_b32_e32 v25, s17
+; GISEL-NEXT:    v_mov_b32_e32 v26, s18
+; GISEL-NEXT:    v_mov_b32_e32 v27, s19
+; GISEL-NEXT:    v_mov_b32_e32 v28, s20
+; GISEL-NEXT:    v_mov_b32_e32 v29, s21
+; GISEL-NEXT:    v_mov_b32_e32 v30, s22
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT:    s_endpgm
   %ptr = load ptr addrspace(1), ptr addrspace(4) poison
   %val = load <32 x i32>, ptr addrspace(1) %ptr
   call void @external_void_func_v32i32(<32 x i32> %val)
@@ -5343,11 +5548,11 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
 ; GISEL-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
 ; GISEL-NEXT:    s_mov_b32 s53, SCRATCH_RSRC_DWORD1
 ; GISEL-NEXT:    s_mov_b32 s54, -1
-; GISEL-NEXT:    s_mov_b32 s55, 0xe00000
 ; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GISEL-NEXT:    s_load_dwordx16 s[8:23], s[0:1], 0x40
 ; GISEL-NEXT:    s_load_dwordx16 s[36:51], s[0:1], 0x0
+; GISEL-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GISEL-NEXT:    s_mov_b32 s55, 0xe00000
 ; GISEL-NEXT:    s_add_u32 s52, s52, s5
 ; GISEL-NEXT:    s_mov_b32 s32, 0
 ; GISEL-NEXT:    s_addc_u32 s53, s53, 0



More information about the llvm-commits mailing list