[llvm] 2959e08 - AMDGPU: Assume all amdhsa kernarg passed implicit arguments by default
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 4 07:38:34 PST 2021
Author: Matt Arsenault
Date: 2021-12-04T10:38:25-05:00
New Revision: 2959e082e1427647e107af0b82770682eaa58fe1
URL: https://github.com/llvm/llvm-project/commit/2959e082e1427647e107af0b82770682eaa58fe1
DIFF: https://github.com/llvm/llvm-project/commit/2959e082e1427647e107af0b82770682eaa58fe1.diff
LOG: AMDGPU: Assume all amdhsa kernarg passed implicit arguments by default
Previously we would require adding an attribute to kernels to enable
the inputs passed in the kernarg segment, accessed by
llvm.amdgcn.implicitarg.ptr. This violates the principle of being
correct by default. Some OpenMP testcases were broken recently since
it wasn't correctly setting this attribute, and no known frontends are
setting this to anything other than the maximum.
Most of the test changes are from load widening of argument loads
since there now more implied dereferenceable bytes.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
llvm/test/CodeGen/AMDGPU/cc-update.ll
llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
llvm/test/CodeGen/AMDGPU/indirect-call.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 6947d112653e..47cec39aeb30 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -648,6 +648,8 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
}
unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const {
+ assert(AMDGPU::isKernel(F.getCallingConv()));
+
// We don't allocate the segment if we know the implicit arguments weren't
// used, even if the ABI implies we need them.
if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
@@ -655,7 +657,9 @@ unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const {
if (isMesaKernel(F))
return 16;
- return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
+
+ // Assume all implicit inputs are used by default
+ return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 56);
}
uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
index f6c50b537905..6dc2148a5d31 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
@@ -9,34 +9,35 @@ declare hidden void @extern()
define amdgpu_kernel void @kernel_call_no_workitem_ids() {
; CHECK-LABEL: name: kernel_call_no_workitem_ids
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]]
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY7]](p4)
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY8]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY9]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY10]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY11]](s32)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY11]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
@@ -47,37 +48,38 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() {
define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
; CHECK-LABEL: name: kernel_call_no_workgroup_ids
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]]
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C2]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C3]](s32)
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY11]], [[SHL]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY7]](p4)
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY8]](s64)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -89,27 +91,28 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
define amdgpu_kernel void @kernel_call_no_other_sgprs() {
; CHECK-LABEL: name: kernel_call_no_other_sgprs
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C2]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL]]
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C3]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p4) = COPY [[COPY3]](p4)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY4]], [[C]](s64)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[SHL]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY8]](<4 x s32>)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 55449b9cca49..a727c76b975a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -155,51 +155,52 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1)
define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
; GCN-LABEL: name: test_call_external_i1_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY19]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -234,51 +235,52 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 {
define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
; GCN-LABEL: name: test_call_external_i1_zeroext_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_zeroext_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY19]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)
; GCN-NEXT: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
@@ -292,51 +294,52 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
; GCN-LABEL: name: test_call_external_i1_signext_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_signext_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY19]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1)
; GCN-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
@@ -350,51 +353,52 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
; GCN-LABEL: name: test_call_external_i8_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
@@ -431,51 +435,52 @@ define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 {
define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
; GCN-LABEL: name: test_call_external_i8_zeroext_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_zeroext_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC1]](s8)
@@ -490,51 +495,52 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
; GCN-LABEL: name: test_call_external_i8_signext_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_signext_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s8)
@@ -549,51 +555,52 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
; GCN-LABEL: name: test_call_external_i16_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -605,51 +612,52 @@ define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
; GCN-LABEL: name: test_call_external_i16_zeroext_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_zeroext_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
; GCN-NEXT: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
@@ -663,51 +671,52 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
; GCN-LABEL: name: test_call_external_i16_signext_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_signext_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
; GCN-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
@@ -721,52 +730,53 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+ ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call i32 @external_i32_func_void()
store volatile i32 %val, i32 addrspace(1)* undef
@@ -798,52 +808,53 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_i48_func_void() #0 {
; GCN-LABEL: name: test_call_external_i48_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (volatile store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1)
@@ -856,52 +867,53 @@ define amdgpu_kernel void @test_call_external_i48_func_void() #0 {
define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 {
; GCN-LABEL: name: test_call_external_i48_zeroext_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_zeroext_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
@@ -916,52 +928,53 @@ define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 {
define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 {
; GCN-LABEL: name: test_call_external_i48_signext_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_signext_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48)
@@ -976,52 +989,53 @@ define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 {
define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
; GCN-LABEL: name: test_call_external_i64_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i64_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1033,52 +1047,53 @@ define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
define amdgpu_kernel void @test_call_external_p1_func_void() #0 {
; GCN-LABEL: name: test_call_external_p1_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_p1_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1090,55 +1105,56 @@ define amdgpu_kernel void @test_call_external_p1_func_void() #0 {
define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 {
; GCN-LABEL: name: test_call_external_v2p1_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2p1_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
- ; GCN-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+ ; GCN-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32)
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (volatile store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
@@ -1151,52 +1167,53 @@ define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 {
define amdgpu_kernel void @test_call_external_p3_func_void() #0 {
; GCN-LABEL: name: test_call_external_p3_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_p3_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(p3) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: G_STORE [[COPY19]](p3), [[DEF]](p3) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(3)* undef`, addrspace 3)
+ ; GCN-NEXT: G_STORE [[COPY21]](p3), [[DEF]](p3) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(3)* undef`, addrspace 3)
; GCN-NEXT: S_ENDPGM 0
%val = call i8 addrspace(3)* @external_p3_func_void()
store volatile i8 addrspace(3)* %val, i8 addrspace(3)* addrspace(3)* undef
@@ -1206,52 +1223,53 @@ define amdgpu_kernel void @test_call_external_p3_func_void() #0 {
define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 {
; GCN-LABEL: name: test_call_external_v2p3_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2p3_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(p3) = COPY $vgpr1
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY19]](p3), [[COPY20]](p3)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(p3) = COPY $vgpr1
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY21]](p3), [[COPY22]](p3)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p3) :: (volatile store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(3)* undef`, addrspace 3)
; GCN-NEXT: S_ENDPGM 0
@@ -1263,51 +1281,52 @@ define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 {
define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
; GCN-LABEL: name: test_call_external_f16_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f16_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1319,52 +1338,53 @@ define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
; GCN-LABEL: name: test_call_external_f32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f32_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `float addrspace(1)* undef`, addrspace 1)
+ ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `float addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call float @external_f32_func_void()
store volatile float %val, float addrspace(1)* undef
@@ -1374,52 +1394,53 @@ define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
; GCN-LABEL: name: test_call_external_f64_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f64_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `double addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1431,55 +1452,56 @@ define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
; GCN-LABEL: name: test_call_external_v2f64_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2f64_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
- ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+ ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32)
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1)
@@ -1492,52 +1514,53 @@ define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v2i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2i32_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1549,53 +1572,54 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v3i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3i32_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 8, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1607,54 +1631,55 @@ define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v4i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4i32_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, align 8, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1666,55 +1691,56 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v5i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v5i32_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 8, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1726,58 +1752,59 @@ define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v8i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v8i32_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v8i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, align 8, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1789,66 +1816,67 @@ define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v16i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v16i32_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v16i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, align 8, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1860,82 +1888,83 @@ define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v32i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v32i32_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY $vgpr24
- ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY $vgpr25
- ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY $vgpr26
- ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY $vgpr27
- ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY $vgpr28
- ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY $vgpr29
- ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(s32) = COPY $vgpr30
- ; GCN-NEXT: [[COPY50:%[0-9]+]]:_(s32) = COPY $vgpr31
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32), [[COPY37]](s32), [[COPY38]](s32), [[COPY39]](s32), [[COPY40]](s32), [[COPY41]](s32), [[COPY42]](s32), [[COPY43]](s32), [[COPY44]](s32), [[COPY45]](s32), [[COPY46]](s32), [[COPY47]](s32), [[COPY48]](s32), [[COPY49]](s32), [[COPY50]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; GCN-NEXT: [[COPY50:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; GCN-NEXT: [[COPY51:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; GCN-NEXT: [[COPY52:%[0-9]+]]:_(s32) = COPY $vgpr31
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32), [[COPY37]](s32), [[COPY38]](s32), [[COPY39]](s32), [[COPY40]](s32), [[COPY41]](s32), [[COPY42]](s32), [[COPY43]](s32), [[COPY44]](s32), [[COPY45]](s32), [[COPY46]](s32), [[COPY47]](s32), [[COPY48]](s32), [[COPY49]](s32), [[COPY50]](s32), [[COPY51]](s32), [[COPY52]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -1947,52 +1976,53 @@ define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
; GCN-LABEL: name: test_call_external_v2i16_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2i16_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: G_STORE [[COPY19]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1)
+ ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call <2 x i16> @external_v2i16_func_void()
store volatile <2 x i16> %val, <2 x i16> addrspace(1)* undef
@@ -2002,53 +2032,54 @@ define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
; GCN-LABEL: name: test_call_external_v3i16_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3i16_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[DEF1]](<2 x s16>)
+ ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[DEF1]](<2 x s16>)
; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
@@ -2061,52 +2092,53 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
; GCN-LABEL: name: test_call_external_v4i16_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4i16_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -2118,52 +2150,53 @@ define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
; GCN-LABEL: name: test_call_external_v2f16_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2f16_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: G_STORE [[COPY19]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1)
+ ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call <2 x half> @external_v2f16_func_void()
store volatile <2 x half> %val, <2 x half> addrspace(1)* undef
@@ -2173,53 +2206,54 @@ define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
; GCN-LABEL: name: test_call_external_v3f16_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3f16_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[DEF1]](<2 x s16>)
+ ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[DEF1]](<2 x s16>)
; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
@@ -2232,52 +2266,53 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
; GCN-LABEL: name: test_call_external_v4f16_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4f16_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -2289,53 +2324,54 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v3f32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3f32_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -2347,55 +2383,56 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v5f32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v5f32_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5f32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `<5 x float> addrspace(1)* undef`, align 32, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
@@ -2408,57 +2445,58 @@ define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
; GCN-LABEL: name: test_call_external_i32_i64_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_i64_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64)
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]]
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: G_STORE [[COPY20]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
- ; GCN-NEXT: G_STORE [[MV]](s64), [[COPY9]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+ ; GCN-NEXT: G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+ ; GCN-NEXT: G_STORE [[MV]](s64), [[COPY10]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call { i32, i64 } @external_i32_i64_func_void()
%val.0 = extractvalue { i32, i64 } %val, 0
@@ -2501,54 +2539,55 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 {
define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_a2i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_a2i32_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a2i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
- ; GCN-NEXT: G_STORE [[COPY20]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+ ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+ ; GCN-NEXT: G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call [2 x i32] @external_a2i32_func_void()
%val.0 = extractvalue [2 x i32] %val, 0
@@ -2561,63 +2600,64 @@ define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 {
; GCN-LABEL: name: test_call_external_a5i8_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_a5i8_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a5i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32)
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32)
; GCN-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32)
; GCN-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16)
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32)
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32)
; GCN-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16)
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32)
+ ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32)
; GCN-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
@@ -2643,59 +2683,60 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 {
define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v32i32_i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v32i32_i32_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64)
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]]
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5)
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5)
- ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
- ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32)
+ ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
+ ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32)
; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5)
; GCN-NEXT: G_STORE [[LOAD]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1)
- ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[COPY9]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+ ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[COPY10]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call { <32 x i32>, i32 } @external_v32i32_i32_func_void()
%val0 = extractvalue { <32 x i32>, i32 } %val, 0
@@ -2708,59 +2749,60 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_i32_v32i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_v32i32_func_void
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64)
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]]
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5)
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5)
- ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
- ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32)
+ ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
+ ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32)
; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5)
; GCN-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
- ; GCN-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[COPY9]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+ ; GCN-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[COPY10]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call { i32, <32 x i32> } @external_i32_v32i32_func_void()
%val0 = extractvalue { i32, <32 x i32> } %val, 0
@@ -2773,49 +2815,50 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
; GCN-LABEL: name: test_call_external_v33i32_func_void
; GCN: bb.1 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; GCN-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_void
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+ ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+ ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
index 661458a86483..ae04ab5ca9a7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
@@ -126,6 +126,6 @@ declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+attributes #1 = { nounwind "amdgpu-implicitarg-num-bytes"="0" }
attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }
attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" }
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
index d4052cb8db02..dc521bce2014 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
@@ -283,7 +283,7 @@ define amdgpu_kernel void @kern_indirect_use_every_sgpr_input(i8) #1 {
; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
; GCN: .amdhsa_system_sgpr_workgroup_info 0
; GCN: .amdhsa_system_vgpr_workitem_id 2
-define amdgpu_kernel void @kern_indirect_use_every_sgpr_input_no_kernargs() #1 {
+define amdgpu_kernel void @kern_indirect_use_every_sgpr_input_no_kernargs() #2 {
call void @use_every_sgpr_input()
ret void
}
@@ -361,3 +361,4 @@ declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind noinline }
+attributes #2 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" }
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
index a6ba6a16223f..d4da01688162 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
@@ -478,7 +478,7 @@ define amdgpu_kernel void @kern_indirect_use_every_sgpr_input(i8) #1 {
; GCN: s_mov_b64 s[8:9], 0{{$}}
; GCN: s_mov_b32 s32, 0
; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_use_every_sgpr_input_no_kernargs() #1 {
+define amdgpu_kernel void @kern_indirect_use_every_sgpr_input_no_kernargs() #2 {
call void @use_every_sgpr_input()
ret void
}
@@ -613,3 +613,4 @@ declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind noinline }
+attributes #2 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" }
diff --git a/llvm/test/CodeGen/AMDGPU/cc-update.ll b/llvm/test/CodeGen/AMDGPU/cc-update.ll
index 50f683c8c495..ca239dec4511 100644
--- a/llvm/test/CodeGen/AMDGPU/cc-update.ll
+++ b/llvm/test/CodeGen/AMDGPU/cc-update.ll
@@ -55,12 +55,12 @@ entry:
define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 {
; GFX803-LABEL: test_kern_call:
; GFX803: ; %bb.0: ; %entry
-; GFX803-NEXT: s_add_i32 s10, s10, s15
-; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
-; GFX803-NEXT: s_add_u32 s0, s0, s15
+; GFX803-NEXT: s_add_i32 s12, s12, s17
+; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GFX803-NEXT: s_add_u32 s0, s0, s17
; GFX803-NEXT: s_addc_u32 s1, s1, 0
; GFX803-NEXT: s_mov_b32 s32, 0
-; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s11
+; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s13
; GFX803-NEXT: s_getpc_b64 s[4:5]
; GFX803-NEXT: s_add_u32 s4, s4, ex at rel32@lo+4
; GFX803-NEXT: s_addc_u32 s5, s5, ex at rel32@hi+12
@@ -69,9 +69,9 @@ define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 {
;
; GFX900-LABEL: test_kern_call:
; GFX900: ; %bb.0: ; %entry
-; GFX900-NEXT: s_add_u32 flat_scratch_lo, s10, s15
-; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s11, 0
-; GFX900-NEXT: s_add_u32 s0, s0, s15
+; GFX900-NEXT: s_add_u32 flat_scratch_lo, s12, s17
+; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
+; GFX900-NEXT: s_add_u32 s0, s0, s17
; GFX900-NEXT: s_addc_u32 s1, s1, 0
; GFX900-NEXT: s_mov_b32 s32, 0
; GFX900-NEXT: s_getpc_b64 s[4:5]
@@ -82,12 +82,12 @@ define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 {
;
; GFX1010-LABEL: test_kern_call:
; GFX1010: ; %bb.0: ; %entry
-; GFX1010-NEXT: s_add_u32 s10, s10, s15
+; GFX1010-NEXT: s_add_u32 s12, s12, s17
; GFX1010-NEXT: s_mov_b32 s32, 0
-; GFX1010-NEXT: s_addc_u32 s11, s11, 0
-; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
-; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
-; GFX1010-NEXT: s_add_u32 s0, s0, s15
+; GFX1010-NEXT: s_addc_u32 s13, s13, 0
+; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
+; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
+; GFX1010-NEXT: s_add_u32 s0, s0, s17
; GFX1010-NEXT: s_addc_u32 s1, s1, 0
; GFX1010-NEXT: s_getpc_b64 s[4:5]
; GFX1010-NEXT: s_add_u32 s4, s4, ex at rel32@lo+4
@@ -102,13 +102,13 @@ entry:
define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
; GFX803-LABEL: test_kern_stack_and_call:
; GFX803: ; %bb.0: ; %entry
-; GFX803-NEXT: s_add_i32 s10, s10, s15
-; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
-; GFX803-NEXT: s_add_u32 s0, s0, s15
+; GFX803-NEXT: s_add_i32 s12, s12, s17
+; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GFX803-NEXT: s_add_u32 s0, s0, s17
; GFX803-NEXT: s_addc_u32 s1, s1, 0
; GFX803-NEXT: v_mov_b32_e32 v0, 0
; GFX803-NEXT: s_movk_i32 s32, 0x400
-; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s11
+; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s13
; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
; GFX803-NEXT: s_waitcnt vmcnt(0)
; GFX803-NEXT: s_getpc_b64 s[4:5]
@@ -119,9 +119,9 @@ define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
;
; GFX900-LABEL: test_kern_stack_and_call:
; GFX900: ; %bb.0: ; %entry
-; GFX900-NEXT: s_add_u32 flat_scratch_lo, s10, s15
-; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s11, 0
-; GFX900-NEXT: s_add_u32 s0, s0, s15
+; GFX900-NEXT: s_add_u32 flat_scratch_lo, s12, s17
+; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
+; GFX900-NEXT: s_add_u32 s0, s0, s17
; GFX900-NEXT: s_addc_u32 s1, s1, 0
; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: s_movk_i32 s32, 0x400
@@ -135,13 +135,13 @@ define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
;
; GFX1010-LABEL: test_kern_stack_and_call:
; GFX1010: ; %bb.0: ; %entry
-; GFX1010-NEXT: s_add_u32 s10, s10, s15
+; GFX1010-NEXT: s_add_u32 s12, s12, s17
; GFX1010-NEXT: s_movk_i32 s32, 0x200
-; GFX1010-NEXT: s_addc_u32 s11, s11, 0
-; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
-; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
+; GFX1010-NEXT: s_addc_u32 s13, s13, 0
+; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
+; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
; GFX1010-NEXT: v_mov_b32_e32 v0, 0
-; GFX1010-NEXT: s_add_u32 s0, s0, s15
+; GFX1010-NEXT: s_add_u32 s0, s0, s17
; GFX1010-NEXT: s_addc_u32 s1, s1, 0
; GFX1010-NEXT: s_getpc_b64 s[4:5]
; GFX1010-NEXT: s_add_u32 s4, s4, ex at rel32@lo+4
@@ -215,13 +215,13 @@ entry:
define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
; GFX803-LABEL: test_force_fp_kern_call:
; GFX803: ; %bb.0: ; %entry
-; GFX803-NEXT: s_add_i32 s10, s10, s15
-; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
-; GFX803-NEXT: s_add_u32 s0, s0, s15
+; GFX803-NEXT: s_add_i32 s12, s12, s17
+; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GFX803-NEXT: s_add_u32 s0, s0, s17
; GFX803-NEXT: s_addc_u32 s1, s1, 0
; GFX803-NEXT: s_mov_b32 s32, 0
; GFX803-NEXT: s_mov_b32 s33, 0
-; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s11
+; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s13
; GFX803-NEXT: s_getpc_b64 s[4:5]
; GFX803-NEXT: s_add_u32 s4, s4, ex at rel32@lo+4
; GFX803-NEXT: s_addc_u32 s5, s5, ex at rel32@hi+12
@@ -230,9 +230,9 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
;
; GFX900-LABEL: test_force_fp_kern_call:
; GFX900: ; %bb.0: ; %entry
-; GFX900-NEXT: s_add_u32 flat_scratch_lo, s10, s15
-; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s11, 0
-; GFX900-NEXT: s_add_u32 s0, s0, s15
+; GFX900-NEXT: s_add_u32 flat_scratch_lo, s12, s17
+; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
+; GFX900-NEXT: s_add_u32 s0, s0, s17
; GFX900-NEXT: s_addc_u32 s1, s1, 0
; GFX900-NEXT: s_mov_b32 s32, 0
; GFX900-NEXT: s_mov_b32 s33, 0
@@ -244,13 +244,13 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
;
; GFX1010-LABEL: test_force_fp_kern_call:
; GFX1010: ; %bb.0: ; %entry
-; GFX1010-NEXT: s_add_u32 s10, s10, s15
+; GFX1010-NEXT: s_add_u32 s12, s12, s17
; GFX1010-NEXT: s_mov_b32 s32, 0
; GFX1010-NEXT: s_mov_b32 s33, 0
-; GFX1010-NEXT: s_addc_u32 s11, s11, 0
-; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
-; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
-; GFX1010-NEXT: s_add_u32 s0, s0, s15
+; GFX1010-NEXT: s_addc_u32 s13, s13, 0
+; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
+; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
+; GFX1010-NEXT: s_add_u32 s0, s0, s17
; GFX1010-NEXT: s_addc_u32 s1, s1, 0
; GFX1010-NEXT: s_getpc_b64 s[4:5]
; GFX1010-NEXT: s_add_u32 s4, s4, ex at rel32@lo+4
@@ -265,14 +265,14 @@ entry:
define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_addr #2 {
; GFX803-LABEL: test_force_fp_kern_stack_and_call:
; GFX803: ; %bb.0: ; %entry
-; GFX803-NEXT: s_add_i32 s10, s10, s15
-; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
-; GFX803-NEXT: s_add_u32 s0, s0, s15
+; GFX803-NEXT: s_add_i32 s12, s12, s17
+; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GFX803-NEXT: s_add_u32 s0, s0, s17
; GFX803-NEXT: s_mov_b32 s33, 0
; GFX803-NEXT: s_addc_u32 s1, s1, 0
; GFX803-NEXT: v_mov_b32_e32 v0, 0
; GFX803-NEXT: s_movk_i32 s32, 0x400
-; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s11
+; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s13
; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
; GFX803-NEXT: s_waitcnt vmcnt(0)
; GFX803-NEXT: s_getpc_b64 s[4:5]
@@ -283,9 +283,9 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
;
; GFX900-LABEL: test_force_fp_kern_stack_and_call:
; GFX900: ; %bb.0: ; %entry
-; GFX900-NEXT: s_add_u32 flat_scratch_lo, s10, s15
-; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s11, 0
-; GFX900-NEXT: s_add_u32 s0, s0, s15
+; GFX900-NEXT: s_add_u32 flat_scratch_lo, s12, s17
+; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
+; GFX900-NEXT: s_add_u32 s0, s0, s17
; GFX900-NEXT: s_mov_b32 s33, 0
; GFX900-NEXT: s_addc_u32 s1, s1, 0
; GFX900-NEXT: v_mov_b32_e32 v0, 0
@@ -300,14 +300,14 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
;
; GFX1010-LABEL: test_force_fp_kern_stack_and_call:
; GFX1010: ; %bb.0: ; %entry
-; GFX1010-NEXT: s_add_u32 s10, s10, s15
+; GFX1010-NEXT: s_add_u32 s12, s12, s17
; GFX1010-NEXT: s_movk_i32 s32, 0x200
; GFX1010-NEXT: s_mov_b32 s33, 0
-; GFX1010-NEXT: s_addc_u32 s11, s11, 0
-; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
-; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
+; GFX1010-NEXT: s_addc_u32 s13, s13, 0
+; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
+; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
; GFX1010-NEXT: v_mov_b32_e32 v0, 0
-; GFX1010-NEXT: s_add_u32 s0, s0, s15
+; GFX1010-NEXT: s_add_u32 s0, s0, s17
; GFX1010-NEXT: s_addc_u32 s1, s1, 0
; GFX1010-NEXT: s_getpc_b64 s[4:5]
; GFX1010-NEXT: s_add_u32 s4, s4, ex at rel32@lo+4
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
index bfd7f7624b88..0f38b3470a99 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
@@ -68,11 +68,11 @@ define amdgpu_kernel void @stack_object_in_kernel_no_calls() {
define amdgpu_kernel void @kernel_calls_no_stack() {
; FLAT_SCR_OPT-LABEL: kernel_calls_no_stack:
; FLAT_SCR_OPT: ; %bb.0:
-; FLAT_SCR_OPT-NEXT: s_add_u32 s6, s6, s11
+; FLAT_SCR_OPT-NEXT: s_add_u32 s8, s8, s13
; FLAT_SCR_OPT-NEXT: s_mov_b32 s32, 0
-; FLAT_SCR_OPT-NEXT: s_addc_u32 s7, s7, 0
-; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
-; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
+; FLAT_SCR_OPT-NEXT: s_addc_u32 s9, s9, 0
+; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
+; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
; FLAT_SCR_OPT-NEXT: s_getpc_b64 s[0:1]
; FLAT_SCR_OPT-NEXT: s_add_u32 s0, s0, extern_func at gotpcrel32@lo+4
; FLAT_SCR_OPT-NEXT: s_addc_u32 s1, s1, extern_func at gotpcrel32@hi+12
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
index 13afcc8a5c1d..8a1066fb7d02 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
@@ -58,7 +58,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
; GCN-NEXT: workitem_private_segment_byte_size = 16384
; GCN-NEXT: workgroup_group_segment_byte_size = 0
; GCN-NEXT: gds_segment_byte_size = 0
-; GCN-NEXT: kernarg_segment_byte_size = 4
+; GCN-NEXT: kernarg_segment_byte_size = 64
; GCN-NEXT: workgroup_fbarrier_count = 0
; GCN-NEXT: wavefront_sgpr_count = 37
; GCN-NEXT: workitem_vgpr_count = 32
@@ -151,7 +151,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
; GISEL-NEXT: workitem_private_segment_byte_size = 16384
; GISEL-NEXT: workgroup_group_segment_byte_size = 0
; GISEL-NEXT: gds_segment_byte_size = 0
-; GISEL-NEXT: kernarg_segment_byte_size = 4
+; GISEL-NEXT: kernarg_segment_byte_size = 64
; GISEL-NEXT: workgroup_fbarrier_count = 0
; GISEL-NEXT: wavefront_sgpr_count = 37
; GISEL-NEXT: workitem_vgpr_count = 32
@@ -249,7 +249,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
; GCN-NEXT: workitem_private_segment_byte_size = 16384
; GCN-NEXT: workgroup_group_segment_byte_size = 0
; GCN-NEXT: gds_segment_byte_size = 0
-; GCN-NEXT: kernarg_segment_byte_size = 4
+; GCN-NEXT: kernarg_segment_byte_size = 64
; GCN-NEXT: workgroup_fbarrier_count = 0
; GCN-NEXT: wavefront_sgpr_count = 37
; GCN-NEXT: workitem_vgpr_count = 32
@@ -343,7 +343,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
; GISEL-NEXT: workitem_private_segment_byte_size = 16384
; GISEL-NEXT: workgroup_group_segment_byte_size = 0
; GISEL-NEXT: gds_segment_byte_size = 0
-; GISEL-NEXT: kernarg_segment_byte_size = 4
+; GISEL-NEXT: kernarg_segment_byte_size = 64
; GISEL-NEXT: workgroup_fbarrier_count = 0
; GISEL-NEXT: wavefront_sgpr_count = 37
; GISEL-NEXT: workitem_vgpr_count = 32
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
index e3ad5493b5c1..4ecc849961ad 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
@@ -2,16 +2,15 @@
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s
; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty:
-; HSA: enable_sgpr_kernarg_segment_ptr = 0
-; HSA: kernarg_segment_byte_size = 0
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: kernarg_segment_byte_size = 56
; HSA: kernarg_segment_alignment = 4
; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 16
; MESA: kernarg_segment_alignment = 4
-; HSA: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0{{$}}
-; HSA: s_load_dword s0, [[NULL]], 0x0
+; HSA: s_load_dword s0, s[4:5], 0x0
define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
@@ -59,7 +58,7 @@ define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 {
; GCN-LABEL: {{^}}kernel_implicitarg_ptr:
; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; HSA: kernarg_segment_byte_size = 112
+; HSA: kernarg_segment_byte_size = 168
; HSA: kernarg_segment_alignment = 4
; MESA: kernarg_segment_byte_size = 128
@@ -115,17 +114,17 @@ define void @opencl_func_implicitarg_ptr() #0 {
}
; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty:
-; HSA: enable_sgpr_kernarg_segment_ptr = 0
-; HSA: kernarg_segment_byte_size = 0
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: kernarg_segment_byte_size = 56
; HSA: kernarg_segment_alignment = 4
; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 16
; MESA: kernarg_segment_alignment = 4
-; XGCN-NOT: s[4:5]
-; XGCN-NOT: s4
-; XGCN-NOT: s5
+; GCN-NOT: s[4:5]
+; GCN-NOT: s4
+; GCN-NOT: s5
; GCN: s_swappc_b64
define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
call void @func_implicitarg_ptr()
@@ -168,8 +167,9 @@ define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 {
; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func:
; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; HSA: kernarg_segment_byte_size = 112
+; HSA: kernarg_segment_byte_size = 168
; HSA: kernarg_segment_alignment = 4
+
; MESA: kernarg_segment_byte_size = 128
; MESA: kernarg_segment_alignment = 4
@@ -272,8 +272,8 @@ define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>
; HSA-LABEL: Kernels:
; HSA-LABEL: - Name: kernel_implicitarg_ptr_empty
; HSA: CodeProps:
-; HSA: KernargSegmentSize: 0
-; HSA: KernargSegmentAlign: 4
+; HSA: KernargSegmentSize: 56
+; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_implicitarg_ptr_empty_0implicit
; HSA: KernargSegmentSize: 0
@@ -284,16 +284,16 @@ define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>
; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_implicitarg_ptr
-; HSA: KernargSegmentSize: 112
-; HSA: KernargSegmentAlign: 4
+; HSA: KernargSegmentSize: 168
+; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: opencl_kernel_implicitarg_ptr
; HSA: KernargSegmentSize: 160
; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_call_implicitarg_ptr_func_empty
-; HSA: KernargSegmentSize: 0
-; HSA: KernargSegmentAlign: 4
+; HSA: KernargSegmentSize: 56
+; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_call_implicitarg_ptr_func_empty_implicit0
; HSA: KernargSegmentSize: 0
@@ -304,16 +304,16 @@ define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>
; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_call_implicitarg_ptr_func
-; HSA: KernargSegmentSize: 112
-; HSA: KernargSegmentAlign: 4
+; HSA: KernargSegmentSize: 168
+; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: opencl_kernel_call_implicitarg_ptr_func
; HSA: KernargSegmentSize: 160
; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_call_kernarg_implicitarg_ptr_func
-; HSA: KernargSegmentSize: 112
-; HSA: KernargSegmentAlign: 4
+; HSA: KernargSegmentSize: 168
+; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_implicitarg_no_struct_align_padding
; HSA: KernargSegmentSize: 120
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
index ecd14b4409cc..3ae0f77881d8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
@@ -20,7 +20,7 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out) #1 {
}
; ALL-LABEL: {{^}}test_implicit:
-; HSA: kernarg_segment_byte_size = 8
+; HSA: kernarg_segment_byte_size = 64
; OS-MESA3D: kernarg_segment_byte_size = 24
; CO-V2: kernarg_segment_alignment = 4
@@ -36,7 +36,7 @@ define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
}
; ALL-LABEL: {{^}}test_implicit_alignment:
-; HSA: kernarg_segment_byte_size = 12
+; HSA: kernarg_segment_byte_size = 72
; OS-MESA3D: kernarg_segment_byte_size = 28
; CO-V2: kernarg_segment_alignment = 4
diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
index 0f01b536d6fe..53020b0080b2 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
@@ -17,7 +17,7 @@ define amdgpu_kernel void @kern_noargs() {
define amdgpu_kernel void @kern_i8(i8 %arg) #0 {
; HSA-LABEL: @kern_i8(
-; HSA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -26,7 +26,7 @@ define amdgpu_kernel void @kern_i8(i8 %arg) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_i8(
-; MESA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -40,7 +40,7 @@ define amdgpu_kernel void @kern_i8(i8 %arg) #0 {
define amdgpu_kernel void @kern_i16(i16 %arg) #0 {
; HSA-LABEL: @kern_i16(
-; HSA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -49,7 +49,7 @@ define amdgpu_kernel void @kern_i16(i16 %arg) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_i16(
-; MESA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -63,7 +63,7 @@ define amdgpu_kernel void @kern_i16(i16 %arg) #0 {
define amdgpu_kernel void @kern_f16(half %arg) #0 {
; HSA-LABEL: @kern_f16(
-; HSA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -73,7 +73,7 @@ define amdgpu_kernel void @kern_f16(half %arg) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_f16(
-; MESA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -88,7 +88,7 @@ define amdgpu_kernel void @kern_f16(half %arg) #0 {
define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 {
; HSA-LABEL: @kern_zeroext_i8(
-; HSA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -97,7 +97,7 @@ define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_zeroext_i8(
-; MESA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -111,7 +111,7 @@ define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 {
define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 {
; HSA-LABEL: @kern_zeroext_i16(
-; HSA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -120,7 +120,7 @@ define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_zeroext_i16(
-; MESA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -134,7 +134,7 @@ define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 {
define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 {
; HSA-LABEL: @kern_signext_i8(
-; HSA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -143,7 +143,7 @@ define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_signext_i8(
-; MESA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -157,7 +157,7 @@ define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 {
define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 {
; HSA-LABEL: @kern_signext_i16(
-; HSA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -166,7 +166,7 @@ define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_signext_i16(
-; MESA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -180,7 +180,7 @@ define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 {
define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) {
; HSA-LABEL: @kern_i8_i8(
-; HSA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -195,7 +195,7 @@ define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_i8_i8(
-; MESA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -216,7 +216,7 @@ define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) {
define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) {
; HSA-LABEL: @kern_v3i8(
-; HSA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -226,7 +226,7 @@ define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_v3i8(
-; MESA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -241,7 +241,7 @@ define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) {
define amdgpu_kernel void @kern_i24(i24 %arg0) {
; HSA-LABEL: @kern_i24(
-; HSA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -250,7 +250,7 @@ define amdgpu_kernel void @kern_i24(i24 %arg0) {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_i24(
-; MESA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -264,7 +264,7 @@ define amdgpu_kernel void @kern_i24(i24 %arg0) {
define amdgpu_kernel void @kern_i32(i32 %arg0) {
; HSA-LABEL: @kern_i32(
-; HSA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -272,7 +272,7 @@ define amdgpu_kernel void @kern_i32(i32 %arg0) {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_i32(
-; MESA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -285,7 +285,7 @@ define amdgpu_kernel void @kern_i32(i32 %arg0) {
define amdgpu_kernel void @kern_f32(float %arg0) {
; HSA-LABEL: @kern_f32(
-; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)*
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -293,7 +293,7 @@ define amdgpu_kernel void @kern_f32(float %arg0) {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_f32(
-; MESA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)*
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -306,7 +306,7 @@ define amdgpu_kernel void @kern_f32(float %arg0) {
define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) {
; HSA-LABEL: @kern_v3i32(
-; HSA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -315,7 +315,7 @@ define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_v3i32(
-; MESA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -329,7 +329,7 @@ define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) {
define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 {
; HSA-LABEL: @kern_v8i32(
-; HSA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)*
; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -337,7 +337,7 @@ define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_v8i32(
-; MESA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)*
; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -350,7 +350,7 @@ define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 {
define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 {
; HSA-LABEL: @kern_v8i64(
-; HSA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(120) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)*
; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -358,7 +358,7 @@ define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_v8i64(
-; MESA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(100) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(120) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)*
; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -371,7 +371,7 @@ define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 {
define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 {
; HSA-LABEL: @kern_v16i64(
-; HSA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(128) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(184) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)*
; HSA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -379,7 +379,7 @@ define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_v16i64(
-; MESA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(164) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(184) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)*
; MESA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -392,7 +392,7 @@ define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 {
define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) {
; HSA-LABEL: @kern_i32_v3i32(
-; HSA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -405,7 +405,7 @@ define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_i32_v3i32(
-; MESA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -427,7 +427,7 @@ define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) {
define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) {
; HSA-LABEL: @kern_struct_a(
-; HSA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(12) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)*
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -435,7 +435,7 @@ define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_struct_a(
-; MESA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)*
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -448,7 +448,7 @@ define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) {
define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 {
; HSA-LABEL: @kern_struct_b_packed(
-; HSA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)*
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -456,7 +456,7 @@ define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_struct_b_packed(
-; MESA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)*
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -511,7 +511,7 @@ define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %a
define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 {
; HSA-LABEL: @kern_lds_ptr(
-; HSA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
; HSA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -519,7 +519,7 @@ define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_lds_ptr(
-; MESA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
; MESA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -532,12 +532,12 @@ define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 {
define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 {
; HSA-LABEL: @kern_lds_ptr_si(
-; HSA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_lds_ptr_si(
-; MESA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
; MESA-NEXT: ret void
;
@@ -547,7 +547,7 @@ define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 {
define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
; HSA-LABEL: @kern_realign_i8_i8(
-; HSA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -562,7 +562,7 @@ define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_realign_i8_i8(
-; MESA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -583,7 +583,7 @@ define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #0 {
; HSA-LABEL: @kern_realign_i8_i8_i8(
-; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -604,7 +604,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_realign_i8_i8_i8(
-; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -632,7 +632,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #
define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) #0 {
; HSA-LABEL: @kern_realign_i8_i8_i8_i8(
-; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -659,7 +659,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_realign_i8_i8_i8_i8(
-; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -694,7 +694,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2
define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 {
; HSA-LABEL: @kern_realign_i8_v3i8(
-; HSA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -709,7 +709,7 @@ define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_realign_i8_v3i8(
-; MESA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -730,7 +730,7 @@ define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 {
define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 {
; HSA-LABEL: @kern_realign_i8_i16(
-; HSA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -745,7 +745,7 @@ define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_realign_i8_i16(
-; MESA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -766,7 +766,7 @@ define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 {
define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 {
; HSA-LABEL: @kern_realign_i1_i1(
-; HSA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -781,7 +781,7 @@ define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_realign_i1_i1(
-; MESA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -802,7 +802,7 @@ define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 {
define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #0 {
; HSA-LABEL: @kern_realign_i1_i1_i1(
-; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -823,7 +823,7 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_realign_i1_i1_i1(
-; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -851,7 +851,7 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #
define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2, i1 %arg3) #0 {
; HSA-LABEL: @kern_realign_i1_i1_i1_i1(
-; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -878,7 +878,7 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_realign_i1_i1_i1_i1(
-; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -913,7 +913,7 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2
define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 {
; HSA-LABEL: @kern_realign_i1_v3i1(
-; HSA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -929,7 +929,7 @@ define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_realign_i1_v3i1(
-; MESA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -951,7 +951,7 @@ define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 {
define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 {
; HSA-LABEL: @kern_realign_i1_i16(
-; HSA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -966,7 +966,7 @@ define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_realign_i1_i16(
-; MESA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -987,7 +987,7 @@ define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 {
define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7) #0 {
; HSA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
-; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -1032,7 +1032,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %ar
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
-; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -1088,7 +1088,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %ar
define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 {
; HSA-LABEL: @kern_realign_f16_f16(
-; HSA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -1105,7 +1105,7 @@ define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_realign_f16_f16(
-; MESA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -1128,7 +1128,7 @@ define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 {
define amdgpu_kernel void @kern_global_ptr(i8 addrspace(1)* %ptr) #0 {
; HSA-LABEL: @kern_global_ptr(
-; HSA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1136,7 +1136,7 @@ define amdgpu_kernel void @kern_global_ptr(i8 addrspace(1)* %ptr) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_global_ptr(
-; MESA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1149,7 +1149,7 @@ define amdgpu_kernel void @kern_global_ptr(i8 addrspace(1)* %ptr) #0 {
define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* dereferenceable(42) %ptr) #0 {
; HSA-LABEL: @kern_global_ptr_dereferencable(
-; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable !1
@@ -1157,7 +1157,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* deref
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_global_ptr_dereferencable(
-; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable !1
@@ -1170,7 +1170,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* deref
define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1)* dereferenceable_or_null(128) %ptr) #0 {
; HSA-LABEL: @kern_global_ptr_dereferencable_or_null(
-; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable_or_null !2
@@ -1178,7 +1178,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_global_ptr_dereferencable_or_null(
-; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable_or_null !2
@@ -1191,7 +1191,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1
define amdgpu_kernel void @kern_nonnull_global_ptr(i8 addrspace(1)* nonnull %ptr) #0 {
; HSA-LABEL: @kern_nonnull_global_ptr(
-; HSA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !nonnull !0
@@ -1199,7 +1199,7 @@ define amdgpu_kernel void @kern_nonnull_global_ptr(i8 addrspace(1)* nonnull %ptr
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_nonnull_global_ptr(
-; MESA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !nonnull !0
@@ -1212,7 +1212,7 @@ define amdgpu_kernel void @kern_nonnull_global_ptr(i8 addrspace(1)* nonnull %ptr
define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 %ptr) #0 {
; HSA-LABEL: @kern_align32_global_ptr(
-; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !align !3
@@ -1220,7 +1220,7 @@ define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 %
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_align32_global_ptr(
-; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !align !3
@@ -1233,12 +1233,12 @@ define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 %
define amdgpu_kernel void @kern_noalias_global_ptr(i8 addrspace(1)* noalias %ptr) #0 {
; HSA-LABEL: @kern_noalias_global_ptr(
-; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_noalias_global_ptr(
-; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
; MESA-NEXT: ret void
;
@@ -1248,13 +1248,13 @@ define amdgpu_kernel void @kern_noalias_global_ptr(i8 addrspace(1)* noalias %ptr
define amdgpu_kernel void @kern_noalias_global_ptr_x2(i8 addrspace(1)* noalias %ptr0, i8 addrspace(1)* noalias %ptr1) #0 {
; HSA-LABEL: @kern_noalias_global_ptr_x2(
-; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_noalias_global_ptr_x2(
-; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
; MESA-NEXT: ret void
@@ -1267,7 +1267,7 @@ define amdgpu_kernel void @kern_noalias_global_ptr_x2(i8 addrspace(1)* noalias %
define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 {
; HSA-LABEL: @struct_i8_i8_arg(
; HSA-NEXT: entry:
-; HSA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)*
; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1279,7 +1279,7 @@ define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 {
;
; MESA-LABEL: @struct_i8_i8_arg(
; MESA-NEXT: entry:
-; MESA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)*
; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1300,7 +1300,7 @@ entry:
define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 {
; HSA-LABEL: @struct_i8_i16_arg(
; HSA-NEXT: entry:
-; HSA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)*
; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1312,7 +1312,7 @@ define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 {
;
; MESA-LABEL: @struct_i8_i16_arg(
; MESA-NEXT: entry:
-; MESA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)*
; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1333,7 +1333,7 @@ entry:
define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 {
; HSA-LABEL: @array_2xi8_arg(
; HSA-NEXT: entry:
-; HSA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)*
; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1345,7 +1345,7 @@ define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 {
;
; MESA-LABEL: @array_2xi8_arg(
; MESA-NEXT: entry:
-; MESA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)*
; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1366,7 +1366,7 @@ entry:
define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 {
; HSA-LABEL: @array_2xi1_arg(
; HSA-NEXT: entry:
-; HSA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)*
; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1378,7 +1378,7 @@ define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 {
;
; MESA-LABEL: @array_2xi1_arg(
; MESA-NEXT: entry:
-; MESA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)*
; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1398,10 +1398,11 @@ entry:
define amdgpu_kernel void @only_empty_struct({} %empty) #0 {
; HSA-LABEL: @only_empty_struct(
+; HSA-NEXT: [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(56) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: ret void
;
; MESA-LABEL: @only_empty_struct(
-; MESA-NEXT: [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(36) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(56) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: ret void
;
ret void
@@ -1409,7 +1410,7 @@ define amdgpu_kernel void @only_empty_struct({} %empty) #0 {
define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 {
; HSA-LABEL: @empty_struct_with_other(
-; HSA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1417,7 +1418,7 @@ define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 {
; HSA-NEXT: ret void
;
; MESA-LABEL: @empty_struct_with_other(
-; MESA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1432,7 +1433,7 @@ define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 {
define amdgpu_kernel void @static_alloca_kern_i32(i32 %arg0) {
; HSA-LABEL: @static_alloca_kern_i32(
; HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
-; HSA-NEXT: [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1441,7 +1442,7 @@ define amdgpu_kernel void @static_alloca_kern_i32(i32 %arg0) {
;
; MESA-LABEL: @static_alloca_kern_i32(
; MESA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
-; MESA-NEXT: [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1458,7 +1459,7 @@ define amdgpu_kernel void @static_alloca_kern_i32(i32 %arg0) {
define amdgpu_kernel void @dyn_alloca_kernarg_i32(i32 %n) {
; HSA-LABEL: @dyn_alloca_kernarg_i32(
; HSA-NEXT: [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5)
-; HSA-NEXT: [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[N_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[N_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[N_KERNARG_OFFSET]] to i32 addrspace(4)*
; HSA-NEXT: [[N_LOAD:%.*]] = load i32, i32 addrspace(4)* [[N_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1469,7 +1470,7 @@ define amdgpu_kernel void @dyn_alloca_kernarg_i32(i32 %n) {
;
; MESA-LABEL: @dyn_alloca_kernarg_i32(
; MESA-NEXT: [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5)
-; MESA-NEXT: [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[N_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[N_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[N_KERNARG_OFFSET]] to i32 addrspace(4)*
; MESA-NEXT: [[N_LOAD:%.*]] = load i32, i32 addrspace(4)* [[N_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1488,7 +1489,7 @@ define amdgpu_kernel void @dyn_alloca_kernarg_i32(i32 %n) {
; Byref pointers should only be treated as offsets from kernarg
define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %out, i8 addrspace(4)* byref(i8) %in.byref) {
; HSA-LABEL: @byref_constant_i8_arg(
-; HSA-NEXT: [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(12) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1499,7 +1500,7 @@ define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %ou
; HSA-NEXT: ret void
;
; MESA-LABEL: @byref_constant_i8_arg(
-; MESA-NEXT: [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1517,7 +1518,7 @@ define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %ou
define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %out, i16 addrspace(4)* byref(i16) %in.byref) {
; HSA-LABEL: @byref_constant_i16_arg(
-; HSA-NEXT: [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(12) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1529,7 +1530,7 @@ define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %o
; HSA-NEXT: ret void
;
; MESA-LABEL: @byref_constant_i16_arg(
-; MESA-NEXT: [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1548,7 +1549,7 @@ define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %o
define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) %in.byref, i32 %after.offset) {
; HSA-LABEL: @byref_constant_i32_arg(
-; HSA-NEXT: [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1563,7 +1564,7 @@ define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %o
; HSA-NEXT: ret void
;
; MESA-LABEL: @byref_constant_i32_arg(
-; MESA-NEXT: [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1585,7 +1586,7 @@ define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %o
define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> addrspace(4)* byref(<4 x i32>) %in.byref, i32 %after.offset) {
; HSA-LABEL: @byref_constant_v4i32_arg(
-; HSA-NEXT: [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(36) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(96) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to <4 x i32> addrspace(1)* addrspace(4)*
; HSA-NEXT: [[OUT_LOAD:%.*]] = load <4 x i32> addrspace(1)*, <4 x i32> addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1601,7 +1602,7 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* noca
; HSA-NEXT: ret void
;
; MESA-LABEL: @byref_constant_v4i32_arg(
-; MESA-NEXT: [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(92) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to <4 x i32> addrspace(1)* addrspace(4)*
; MESA-NEXT: [[OUT_LOAD:%.*]] = load <4 x i32> addrspace(1)*, <4 x i32> addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1625,7 +1626,7 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* noca
define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(256) %in.byref, i32 %after.offset) {
; HSA-LABEL: @byref_align_constant_i32_arg(
-; HSA-NEXT: [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(320) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1640,7 +1641,7 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapt
; HSA-NEXT: ret void
;
; MESA-LABEL: @byref_align_constant_i32_arg(
-; MESA-NEXT: [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(300) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(320) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1662,7 +1663,7 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapt
define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace(1)* nocapture %out, i8, <16 x i32> addrspace(4)* byref(<16 x i32>) %in.byref, i32 %after.offset) {
; HSA-LABEL: @byref_natural_align_constant_v16i32_arg(
-; HSA-NEXT: [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(132) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(192) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1678,7 +1679,7 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace
; HSA-NEXT: ret void
;
; MESA-LABEL: @byref_natural_align_constant_v16i32_arg(
-; MESA-NEXT: [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(168) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(188) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1703,7 +1704,7 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace
; Also accept byref kernel arguments with other global address spaces.
define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* byref(i32) %in.byref) {
; HSA-LABEL: @byref_global_i32_arg(
-; HSA-NEXT: [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(12) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1714,7 +1715,7 @@ define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out
; HSA-NEXT: ret void
;
; MESA-LABEL: @byref_global_i32_arg(
-; MESA-NEXT: [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1731,7 +1732,7 @@ define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out
define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, i32* byref(i32) %in.byref) {
; HSA-LABEL: @byref_flat_i32_arg(
-; HSA-NEXT: [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(12) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1742,7 +1743,7 @@ define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out,
; HSA-NEXT: ret void
;
; MESA-LABEL: @byref_flat_i32_arg(
-; MESA-NEXT: [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1759,7 +1760,7 @@ define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out,
define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(6)* byref(i32) %in.byref) {
; HSA-LABEL: @byref_constant_32bit_i32_arg(
-; HSA-NEXT: [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(12) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1770,7 +1771,7 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapt
; HSA-NEXT: ret void
;
; MESA-LABEL: @byref_constant_32bit_i32_arg(
-; MESA-NEXT: [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1787,7 +1788,7 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapt
define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(999)* byref(i32) %in.byref) {
; HSA-LABEL: @byref_unknown_as_i32_arg(
-; HSA-NEXT: [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(12) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1798,7 +1799,7 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture
; HSA-NEXT: ret void
;
; MESA-LABEL: @byref_unknown_as_i32_arg(
-; MESA-NEXT: [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1816,7 +1817,7 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture
; Invalid, but should not crash.
define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(3)* byref(i32) %in.byref) {
; HSA-LABEL: @byref_local_i32_arg(
-; HSA-NEXT: [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(12) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1827,7 +1828,7 @@ define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out,
; HSA-NEXT: ret void
;
; MESA-LABEL: @byref_local_i32_arg(
-; MESA-NEXT: [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1844,7 +1845,7 @@ define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out,
define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) %in0.byref, i32 addrspace(4)* byref(i32) %in1.byref, i32 %after.offset) {
; HSA-LABEL: @multi_byref_constant_i32_arg(
-; HSA-NEXT: [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(20) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(80) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; HSA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1863,7 +1864,7 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapt
; HSA-NEXT: ret void
;
; MESA-LABEL: @multi_byref_constant_i32_arg(
-; MESA-NEXT: [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(56) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(76) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
; MESA-NEXT: [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1891,7 +1892,7 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapt
define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byref(i32) %in.byref) {
; HSA-LABEL: @byref_constant_i32_arg_offset0(
-; HSA-NEXT: [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(4)* [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to i32 addrspace(4)*
; HSA-NEXT: [[IN:%.*]] = load i32, i32 addrspace(4)* [[TMP1]], align 4
@@ -1899,7 +1900,7 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byre
; HSA-NEXT: ret void
;
; MESA-LABEL: @byref_constant_i32_arg_offset0(
-; MESA-NEXT: [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT]], i64 36
; MESA-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(4)* [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to i32 addrspace(4)*
; MESA-NEXT: [[IN:%.*]] = load i32, i32 addrspace(4)* [[TMP1]], align 4
More information about the llvm-commits
mailing list