[clang] ca856ff - Revert "enable code-object-version=5"

Ron Lieberman via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 29 13:21:51 PST 2022


Author: Ron Lieberman
Date: 2022-11-29T15:21:09-06:00
New Revision: ca856fff1c0fcb5811ae33e3ce9cd7c2f7702a30

URL: https://github.com/llvm/llvm-project/commit/ca856fff1c0fcb5811ae33e3ce9cd7c2f7702a30
DIFF: https://github.com/llvm/llvm-project/commit/ca856fff1c0fcb5811ae33e3ce9cd7c2f7702a30.diff

LOG: Revert "enable code-object-version=5"

very sorry wrong repo.

This reverts commit d882ba7aeac4b496dccd1b10cb58bd691786b691.

Added: 
    

Modified: 
    clang/include/clang/Driver/Options.td
    clang/lib/Driver/ToolChains/CommonArgs.cpp
    clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
    clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
    clang/test/CodeGenOpenCL/builtins-amdgcn.cl
    clang/test/Driver/hip-device-libs.hip
    lld/test/ELF/emulation-amdgpu.s
    lld/test/ELF/lto/amdgcn-oses.ll
    llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
    llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
    llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
    llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
    llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
    llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
    llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
    llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
    llvm/test/CodeGen/AMDGPU/cc-update.ll
    llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
    llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
    llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll
    llvm/test/CodeGen/AMDGPU/ds_read2.ll
    llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
    llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll
    llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
    llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
    llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
    llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
    llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
    llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
    llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
    llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll
    llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
    llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
    llvm/test/CodeGen/AMDGPU/recursion.ll
    llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
    llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
    llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll
    llvm/test/CodeGen/AMDGPU/trap.ll
    llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
    llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
    mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 9a2ca1711798a..8da5e25bd38d0 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3690,12 +3690,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
   NegFlag<SetFalse, [CC1Option]>>, Group<m_Group>;
 
 def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
-  HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">,
+  HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
   Flags<[CC1Option]>,
   Values<"none,2,3,4,5">,
   NormalizedValuesScope<"TargetOptions">,
   NormalizedValues<["COV_None", "COV_2", "COV_3", "COV_4", "COV_5"]>,
-  MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_5">;
+  MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_4">;
 
 defm code_object_v3_legacy : SimpleMFlag<"code-object-v3",
   "Legacy option to specify code object ABI V3",

diff  --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index bea5b15fa84ea..30fca3a4c0590 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2157,7 +2157,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
 
 unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D,
                                            const llvm::opt::ArgList &Args) {
-  unsigned CodeObjVer = 5; // default
+  unsigned CodeObjVer = 4; // default
   if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) {
     if (CodeObjArg->getOption().getID() ==
         options::OPT_mno_code_object_v3_legacy) {

diff  --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
index 62ccc2bd4d05d..16505b34c4a6e 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
@@ -1,7 +1,7 @@
 // Create module flag for code object version.
 
 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
-// RUN:   -o - %s | FileCheck %s -check-prefix=V5
+// RUN:   -o - %s | FileCheck %s -check-prefix=V4
 
 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
 // RUN:   -mcode-object-version=2 -o - %s | FileCheck -check-prefix=V2 %s

diff  --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
index 847be23ba8e48..4d788e6807ab2 100644
--- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa \
-// RUN:     -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \
+// RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefix=PRECOV5 %s
 
 

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index a678a27734dc1..600194e5e6c13 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -583,13 +583,13 @@ void test_get_local_id(int d, global int *out)
 }
 
 // CHECK-LABEL: @test_get_workgroup_size(
-// CHECK: call align 8 dereferenceable(256) i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
-// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 12
+// CHECK: call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 4
 // CHECK: load i16, i16 addrspace(4)* %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load
-// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 14
+// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 6
 // CHECK: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
-// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 16
-// CHECK: load i16, i16 addrspace(4)* %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load
+// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 8
+// CHECK: load i16, i16 addrspace(4)* %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load
 void test_get_workgroup_size(int d, global int *out)
 {
 	switch (d) {

diff  --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip
index 8535705a09694..d276f6ea47244 100644
--- a/clang/test/Driver/hip-device-libs.hip
+++ b/clang/test/Driver/hip-device-libs.hip
@@ -139,13 +139,13 @@
 // Test default code object version.
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4
 
 // Test default code object version with old device library without abi_version_400.bc
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
 // RUN:   --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver   \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5
+// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4
 
 // Test -mcode-object-version=3
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \

diff  --git a/lld/test/ELF/emulation-amdgpu.s b/lld/test/ELF/emulation-amdgpu.s
index 329fb1c69b166..707f0aeb909ef 100644
--- a/lld/test/ELF/emulation-amdgpu.s
+++ b/lld/test/ELF/emulation-amdgpu.s
@@ -13,7 +13,7 @@
 # CHECK-NEXT:     DataEncoding: LittleEndian (0x1)
 # CHECK-NEXT:     FileVersion: 1
 # CHECK-NEXT:     OS/ABI: AMDGPU_HSA (0x40)
-# CHECK-NEXT:     ABIVersion: 3
+# CHECK-NEXT:     ABIVersion: 2
 # CHECK-NEXT:     Unused: (00 00 00 00 00 00 00)
 # CHECK-NEXT:   }
 # CHECK-NEXT:   Type: Executable (0x2)

diff  --git a/lld/test/ELF/lto/amdgcn-oses.ll b/lld/test/ELF/lto/amdgcn-oses.ll
index a70b678ac2514..a2f25cdd57d87 100644
--- a/lld/test/ELF/lto/amdgcn-oses.ll
+++ b/lld/test/ELF/lto/amdgcn-oses.ll
@@ -15,7 +15,7 @@
 ; RUN: llvm-readobj --file-headers %t/mesa3d.so | FileCheck %s --check-prefixes=GCN,NON-AMDHSA,MESA3D
 
 ; AMDHSA: OS/ABI: AMDGPU_HSA (0x40)
-; AMDHSA: ABIVersion: 3
+; AMDHSA: ABIVersion: 2
 
 ; AMDPAL: OS/ABI: AMDGPU_PAL (0x41)
 ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42)

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 426df3faa2c34..ce2f7f0642be1 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -32,7 +32,7 @@
 static llvm::cl::opt<unsigned>
     AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden,
                             llvm::cl::desc("AMDHSA Code Object Version"),
-                            llvm::cl::init(5));
+                            llvm::cl::init(4));
 
 namespace {
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
index b751c14ca9a26..bb6f809df53f4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
@@ -7,42 +7,43 @@ declare void @callee()
 define amdgpu_kernel void @call_debug_loc() {
   ; CHECK-LABEL: name: call_debug_loc
   ; CHECK: bb.1.entry:
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2, debug-location !6
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1, debug-location !6
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0, debug-location !6
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !6
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13, debug-location !6
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12, debug-location !6
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9, debug-location !6
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !6
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16, debug-location !6
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15, debug-location !6
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !6
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11, debug-location !6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7, debug-location !6
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !6
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc, debug-location !6
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !6
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF debug-location !6
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !6
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !6
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !6
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !6
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !6
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY8]], debug-location !6
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !6
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !6
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !6
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !6
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !6
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !6
   ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; CHECK-NEXT:   [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY14]], [[COPY1]], implicit $exec, debug-location !6
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+  ; CHECK-NEXT:   [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[COPY1]], implicit $exec, debug-location !6
   ; CHECK-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
-  ; CHECK-NEXT:   [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY15]], [[COPY]], implicit $exec, debug-location !6
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; CHECK-NEXT:   [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[COPY]], implicit $exec, debug-location !6
   ; CHECK-NEXT:   [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !6
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !6
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]], debug-location !6
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]], debug-location !6
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]], debug-location !6
-  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY8]], debug-location !6
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY10]], debug-location !6
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY11]], debug-location !6
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY12]], debug-location !6
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY13]], debug-location !6
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]], debug-location !6
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !6
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]], debug-location !6
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]], debug-location !6
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]], debug-location !6
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY9]], debug-location !6
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]], debug-location !6
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]], debug-location !6
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]], debug-location !6
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]], debug-location !6
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]], debug-location !6
   ; CHECK-NEXT:   $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !6
   ; CHECK-NEXT:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def $scc, debug-location !6
   ; CHECK-NEXT:   [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !6 :: (dereferenceable invariant load (p0) from got, addrspace 4)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
index 24080b11a18bb..55318eae4d72f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
@@ -32,13 +32,13 @@ define void @call_result_align_1() {
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -81,13 +81,13 @@ define void @call_result_align_8() {
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -131,13 +131,13 @@ define void @declaration_result_align_8() {
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr_align8
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -181,11 +181,11 @@ define ptr addrspace(1) @tail_call_assert_align() {
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @returns_ptr_align8
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
index 050699699d9aa..545580f33c74f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
@@ -9,36 +9,37 @@ declare hidden void @extern()
 define amdgpu_kernel void @kernel_call_no_workitem_ids() {
   ; CHECK-LABEL: name: kernel_call_no_workitem_ids
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]](p4)
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY7]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY6]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY7]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY8]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY8]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY9]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY10]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY11]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY10]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY11]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY12]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY13]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
@@ -49,39 +50,40 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() {
 define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
   ; CHECK-LABEL: name: kernel_call_no_workgroup_ids
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]](p4)
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY7]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY11]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY6]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY7]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY8]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY8]](s64)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY10]](s64)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -93,12 +95,12 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
 define amdgpu_kernel void @kernel_call_no_other_sgprs() {
   ; CHECK-LABEL: name: kernel_call_no_other_sgprs
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr6_sgpr7
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr8_sgpr9
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(p4) = COPY [[COPY3]](p4)
@@ -137,12 +139,12 @@ define void @func_call_no_workitem_ids() {
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]]
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
@@ -175,12 +177,12 @@ define void @func_call_no_workgroup_ids() {
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]](p4)
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]]
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY3]]
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY2]]
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]]

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
index c1508d2199ce1..cfc2fc0dc439a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
@@ -11,102 +11,104 @@ declare hidden void @external_void_func_v32i32(<32 x i32>) #0
 define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
   ; GFX900-LABEL: name: test_call_external_void_func_i32
   ; GFX900: bb.1 (%ir-block.1):
-  ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GFX900-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GFX900-NEXT: {{  $}}
   ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; GFX900-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX900-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX900-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
-  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX900-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX900-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GFX900-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GFX900-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX900-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GFX900-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX900-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GFX900-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; GFX900-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; GFX900-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; GFX900-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; GFX900-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GFX900-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX900-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; GFX900-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GFX900-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GFX900-NEXT:   S_ENDPGM 0
   ; GFX908-LABEL: name: test_call_external_void_func_i32
   ; GFX908: bb.1 (%ir-block.1):
-  ; GFX908-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GFX908-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; GFX908-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX908-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
-  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX908-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX908-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GFX908-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GFX908-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX908-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GFX908-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX908-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GFX908-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; GFX908-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; GFX908-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; GFX908-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; GFX908-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GFX908-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX908-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; GFX908-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GFX908-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -127,13 +129,13 @@ define void @test_func_call_external_void_func_i32() #0 {
   ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99
   ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX900-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
   ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -167,13 +169,13 @@ define void @test_func_call_external_void_func_i32() #0 {
   ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99
   ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX908-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
   ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -205,40 +207,41 @@ define void @test_func_call_external_void_func_i32() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
   ; GFX900-LABEL: name: test_call_external_void_func_v32i32
   ; GFX900: bb.1 (%ir-block.1):
-  ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GFX900-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GFX900-NEXT: {{  $}}
   ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; GFX900-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
   ; GFX900-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX900-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
-  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX900-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
-  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX900-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX900-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GFX900-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GFX900-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX900-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GFX900-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX900-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GFX900-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; GFX900-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; GFX900-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
   ; GFX900-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -276,56 +279,57 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
   ; GFX900-NEXT:   $vgpr28 = COPY [[UV28]](s32)
   ; GFX900-NEXT:   $vgpr29 = COPY [[UV29]](s32)
   ; GFX900-NEXT:   $vgpr30 = COPY [[UV30]](s32)
-  ; GFX900-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; GFX900-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GFX900-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX900-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; GFX900-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GFX900-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
   ; GFX900-NEXT:   S_ENDPGM 0
   ; GFX908-LABEL: name: test_call_external_void_func_v32i32
   ; GFX908: bb.1 (%ir-block.1):
-  ; GFX908-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GFX908-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; GFX908-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
   ; GFX908-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX908-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
-  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
-  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX908-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX908-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GFX908-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GFX908-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX908-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GFX908-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX908-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GFX908-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; GFX908-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; GFX908-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
   ; GFX908-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -363,16 +367,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
   ; GFX908-NEXT:   $vgpr28 = COPY [[UV28]](s32)
   ; GFX908-NEXT:   $vgpr29 = COPY [[UV29]](s32)
   ; GFX908-NEXT:   $vgpr30 = COPY [[UV30]](s32)
-  ; GFX908-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; GFX908-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GFX908-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX908-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; GFX908-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GFX908-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
@@ -393,7 +397,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
   ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GFX900-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
@@ -451,7 +455,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
   ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX900-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
   ; GFX900-NEXT:   [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX900-NEXT:   [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GFX900-NEXT:   [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GFX900-NEXT:   [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GFX900-NEXT:   [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GFX900-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -520,7 +524,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
   ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GFX908-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
@@ -578,7 +582,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
   ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX908-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
   ; GFX908-NEXT:   [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX908-NEXT:   [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GFX908-NEXT:   [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GFX908-NEXT:   [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GFX908-NEXT:   [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GFX908-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -643,81 +647,83 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
 define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0 {
   ; GFX900-LABEL: name: test_only_workitem_id_x
   ; GFX900: bb.1 (%ir-block.0):
-  ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GFX900-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GFX900-NEXT: {{  $}}
   ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX900-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GFX900-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
   ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64)
-  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GFX900-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GFX900-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GFX900-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
-  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY7]](p4)
-  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY8]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY9]](p4)
   ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY9]](s64)
-  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY10]](s32)
-  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY11]](s32)
-  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY12]](s32)
-  ; GFX900-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
-  ; GFX900-NEXT:   $vgpr31 = COPY [[COPY13]](s32)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GFX900-NEXT:   $sgpr15 = COPY [[DEF]](s32)
+  ; GFX900-NEXT:   $vgpr31 = COPY [[COPY15]](s32)
   ; GFX900-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GFX900-NEXT:   S_ENDPGM 0
   ; GFX908-LABEL: name: test_only_workitem_id_x
   ; GFX908: bb.1 (%ir-block.0):
-  ; GFX908-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GFX908-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX908-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
   ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64)
-  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GFX908-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
-  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY7]](p4)
-  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY8]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY9]](p4)
   ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY9]](s64)
-  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY10]](s32)
-  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY11]](s32)
-  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY12]](s32)
-  ; GFX908-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
-  ; GFX908-NEXT:   $vgpr31 = COPY [[COPY13]](s32)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GFX908-NEXT:   $sgpr15 = COPY [[DEF]](s32)
+  ; GFX908-NEXT:   $vgpr31 = COPY [[COPY15]](s32)
   ; GFX908-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GFX908-NEXT:   S_ENDPGM 0
@@ -728,88 +734,90 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
 define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1 {
   ; GFX900-LABEL: name: test_only_workitem_id_y
   ; GFX900: bb.1 (%ir-block.0):
-  ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GFX900-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GFX900-NEXT: {{  $}}
   ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX900-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GFX900-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
   ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64)
-  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GFX900-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GFX900-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
   ; GFX900-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GFX900-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C3]](s32)
+  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32)
   ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]]
   ; GFX900-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
-  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY7]](p4)
-  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY8]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY9]](p4)
   ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY9]](s64)
-  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY10]](s32)
-  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY11]](s32)
-  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY12]](s32)
-  ; GFX900-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GFX900-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; GFX900-NEXT:   $vgpr31 = COPY [[OR]](s32)
   ; GFX900-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GFX900-NEXT:   S_ENDPGM 0
   ; GFX908-LABEL: name: test_only_workitem_id_y
   ; GFX908: bb.1 (%ir-block.0):
-  ; GFX908-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GFX908-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX908-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
   ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64)
-  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
   ; GFX908-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GFX908-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C3]](s32)
+  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32)
   ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]]
   ; GFX908-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
-  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY7]](p4)
-  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY8]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY9]](p4)
   ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY9]](s64)
-  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY10]](s32)
-  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY11]](s32)
-  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY12]](s32)
-  ; GFX908-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GFX908-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; GFX908-NEXT:   $vgpr31 = COPY [[OR]](s32)
   ; GFX908-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -821,88 +829,90 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
 define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2 {
   ; GFX900-LABEL: name: test_only_workitem_id_z
   ; GFX900: bb.1 (%ir-block.0):
-  ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GFX900-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GFX900-NEXT: {{  $}}
   ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX900-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GFX900-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
   ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64)
-  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GFX900-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GFX900-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
   ; GFX900-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GFX900-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C3]](s32)
+  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32)
   ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]]
   ; GFX900-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
-  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY7]](p4)
-  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY8]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY9]](p4)
   ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY9]](s64)
-  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY10]](s32)
-  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY11]](s32)
-  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY12]](s32)
-  ; GFX900-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GFX900-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; GFX900-NEXT:   $vgpr31 = COPY [[OR]](s32)
   ; GFX900-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GFX900-NEXT:   S_ENDPGM 0
   ; GFX908-LABEL: name: test_only_workitem_id_z
   ; GFX908: bb.1 (%ir-block.0):
-  ; GFX908-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GFX908-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX908-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
   ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY8]], [[C1]](s64)
-  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
   ; GFX908-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GFX908-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C3]](s32)
+  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C3]](s32)
   ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]]
   ; GFX908-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
-  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY7]](p4)
-  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY8]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY9]](p4)
   ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY9]](s64)
-  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY10]](s32)
-  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY11]](s32)
-  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY12]](s32)
-  ; GFX908-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GFX908-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; GFX908-NEXT:   $vgpr31 = COPY [[OR]](s32)
   ; GFX908-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -914,90 +924,92 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
 define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !3 {
   ; GFX900-LABEL: name: test_only_workitem_id_xy
   ; GFX900: bb.1 (%ir-block.0):
-  ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GFX900-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GFX900-NEXT: {{  $}}
   ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX900-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GFX900-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
   ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64)
-  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]]
-  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GFX900-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GFX900-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX900-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GFX900-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C2]](s32)
-  ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL]]
+  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]]
   ; GFX900-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
-  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY8]](p4)
-  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; GFX900-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
   ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY10]](s64)
-  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY11]](s32)
-  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY12]](s32)
-  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY13]](s32)
-  ; GFX900-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; GFX900-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; GFX900-NEXT:   $vgpr31 = COPY [[OR]](s32)
   ; GFX900-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GFX900-NEXT:   S_ENDPGM 0
   ; GFX908-LABEL: name: test_only_workitem_id_xy
   ; GFX908: bb.1 (%ir-block.0):
-  ; GFX908-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GFX908-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX908-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
   ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64)
-  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]]
-  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX908-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GFX908-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C2]](s32)
-  ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL]]
+  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]]
   ; GFX908-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
-  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY8]](p4)
-  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; GFX908-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
   ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY10]](s64)
-  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY11]](s32)
-  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY12]](s32)
-  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY13]](s32)
-  ; GFX908-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; GFX908-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; GFX908-NEXT:   $vgpr31 = COPY [[OR]](s32)
   ; GFX908-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1009,98 +1021,100 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
 define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !4 {
   ; GFX900-LABEL: name: test_only_workitem_id_yz
   ; GFX900: bb.1 (%ir-block.0):
-  ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GFX900-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GFX900-NEXT: {{  $}}
   ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX900-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GFX900-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
   ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64)
-  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]]
-  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GFX900-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GFX900-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
   ; GFX900-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GFX900-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[C3]](s32)
+  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
   ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]]
-  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GFX900-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX900-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C4]](s32)
+  ; GFX900-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
   ; GFX900-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; GFX900-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
-  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY8]](p4)
-  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; GFX900-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
   ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY10]](s64)
-  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY11]](s32)
-  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY12]](s32)
-  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY13]](s32)
-  ; GFX900-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; GFX900-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; GFX900-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GFX900-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GFX900-NEXT:   S_ENDPGM 0
   ; GFX908-LABEL: name: test_only_workitem_id_yz
   ; GFX908: bb.1 (%ir-block.0):
-  ; GFX908-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GFX908-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX908-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
   ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64)
-  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]]
-  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
   ; GFX908-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GFX908-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY14]], [[C3]](s32)
+  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
   ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]]
-  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GFX908-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX908-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C4]](s32)
+  ; GFX908-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
   ; GFX908-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; GFX908-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
-  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY8]](p4)
-  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; GFX908-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
   ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY10]](s64)
-  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY11]](s32)
-  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY12]](s32)
-  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY13]](s32)
-  ; GFX908-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; GFX908-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; GFX908-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GFX908-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1112,90 +1126,92 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
 define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !5 {
   ; GFX900-LABEL: name: test_only_workitem_id_xz
   ; GFX900: bb.1 (%ir-block.0):
-  ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GFX900-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GFX900-NEXT: {{  $}}
   ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX900-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GFX900-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
   ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64)
-  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]]
-  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GFX900-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GFX900-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX900-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GFX900-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C2]](s32)
-  ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL]]
+  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]]
   ; GFX900-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
-  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY8]](p4)
-  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; GFX900-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
   ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY10]](s64)
-  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY11]](s32)
-  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY12]](s32)
-  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY13]](s32)
-  ; GFX900-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; GFX900-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; GFX900-NEXT:   $vgpr31 = COPY [[OR]](s32)
   ; GFX900-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GFX900-NEXT:   S_ENDPGM 0
   ; GFX908-LABEL: name: test_only_workitem_id_xz
   ; GFX908: bb.1 (%ir-block.0):
-  ; GFX908-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GFX908-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GFX908-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
   ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C1]](s64)
-  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY5]]
-  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY11]], [[C1]](s64)
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX908-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GFX908-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C2]](s32)
-  ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL]]
+  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]]
   ; GFX908-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
-  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY8]](p4)
-  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; GFX908-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
   ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY10]](s64)
-  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY11]](s32)
-  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY12]](s32)
-  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY13]](s32)
-  ; GFX908-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; GFX908-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; GFX908-NEXT:   $vgpr31 = COPY [[OR]](s32)
   ; GFX908-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 23acb2d4a07a3..572b2e5a8b245 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -70,57 +70,58 @@ declare hidden i32 @external_gfx_i32_func_i32(i32) #0
 define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
   ; GCN-LABEL: name: test_call_external_i32_func_i32_imm
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; GCN-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset1, align 16, addrspace 4)
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_i32
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; GCN-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[COPY19]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY21]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call i32 @external_i32_func_i32(i32 42)
   store volatile i32 %val, ptr addrspace(1) %out
@@ -154,53 +155,54 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(ptr addrspace(1)
 define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i1_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY19]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -231,53 +233,54 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i1_zeroext_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_zeroext_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY19]], 1
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 1
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)
@@ -292,53 +295,54 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i1_signext_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_signext_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY19]], 1
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 1
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1)
@@ -353,53 +357,54 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i8_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
   ; GCN-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1)
@@ -432,53 +437,54 @@ define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i8_zeroext_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_zeroext_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY19]], 8
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 8
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
@@ -493,53 +499,54 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i8_signext_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_signext_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY19]], 8
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 8
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)
@@ -554,53 +561,54 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i16_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -612,53 +620,54 @@ define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i16_zeroext_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_zeroext_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY19]], 16
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 16
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
@@ -673,53 +682,54 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i16_signext_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_signext_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY19]], 16
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 16
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
@@ -734,54 +744,55 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call i32 @external_i32_func_void()
   store volatile i32 %val, ptr addrspace(1) undef
@@ -809,54 +820,55 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i48_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i48_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (volatile store (s48) into `ptr addrspace(1) undef`, align 8, addrspace 1)
@@ -869,54 +881,55 @@ define amdgpu_kernel void @test_call_external_i48_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i48_zeroext_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_zeroext_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
@@ -931,54 +944,55 @@ define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i48_signext_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_signext_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48)
@@ -993,54 +1007,55 @@ define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i64_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i64_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -1052,54 +1067,55 @@ define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
 define amdgpu_kernel void @test_call_external_p1_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_p1_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_p1_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[MV]](p1), [[DEF]](p1) :: (volatile store (p1) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -1111,57 +1127,58 @@ define amdgpu_kernel void @test_call_external_p1_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v2p1_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2p1_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
-  ; GCN-NEXT:   [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+  ; GCN-NEXT:   [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32)
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (volatile store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1)
@@ -1174,54 +1191,55 @@ define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 {
 define amdgpu_kernel void @test_call_external_p3_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_p3_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_p3_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[COPY19]](p3), [[DEF]](p3) :: (volatile store (p3) into `ptr addrspace(3) undef`, addrspace 3)
+  ; GCN-NEXT:   G_STORE [[COPY21]](p3), [[DEF]](p3) :: (volatile store (p3) into `ptr addrspace(3) undef`, addrspace 3)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call ptr addrspace(3) @external_p3_func_void()
   store volatile ptr addrspace(3) %val, ptr addrspace(3) undef
@@ -1231,54 +1249,55 @@ define amdgpu_kernel void @test_call_external_p3_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v2p3_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2p3_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(p3) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(p3) = COPY $vgpr1
-  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY19]](p3), [[COPY20]](p3)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(p3) = COPY $vgpr1
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY21]](p3), [[COPY22]](p3)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p3) :: (volatile store (<2 x p3>) into `ptr addrspace(3) undef`, addrspace 3)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -1290,53 +1309,54 @@ define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 {
 define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_f16_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f16_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -1348,54 +1368,55 @@ define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
 define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_f32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f32_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call float @external_f32_func_void()
   store volatile float %val, ptr addrspace(1) undef
@@ -1405,54 +1426,55 @@ define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_f64_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f64_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -1464,57 +1486,58 @@ define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v2f64_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2f64_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
-  ; GCN-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+  ; GCN-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32)
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1)
@@ -1527,54 +1550,55 @@ define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v2i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2i32_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -1586,55 +1610,56 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v3i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3i32_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -1646,56 +1671,57 @@ define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v4i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4i32_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -1707,57 +1733,58 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v5i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v5i32_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -1769,60 +1796,61 @@ define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v8i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v8i32_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v8i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -1834,68 +1862,69 @@ define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v16i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v16i32_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v16i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GCN-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GCN-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GCN-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GCN-NEXT:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GCN-NEXT:   [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GCN-NEXT:   [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GCN-NEXT:   [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GCN-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GCN-NEXT:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GCN-NEXT:   [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GCN-NEXT:   [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GCN-NEXT:   [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GCN-NEXT:   [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GCN-NEXT:   [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -1907,84 +1936,85 @@ define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v32i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v32i32_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GCN-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GCN-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GCN-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GCN-NEXT:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GCN-NEXT:   [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GCN-NEXT:   [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GCN-NEXT:   [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GCN-NEXT:   [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; GCN-NEXT:   [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; GCN-NEXT:   [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; GCN-NEXT:   [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; GCN-NEXT:   [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; GCN-NEXT:   [[COPY40:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; GCN-NEXT:   [[COPY41:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; GCN-NEXT:   [[COPY42:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; GCN-NEXT:   [[COPY43:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; GCN-NEXT:   [[COPY44:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; GCN-NEXT:   [[COPY45:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; GCN-NEXT:   [[COPY46:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; GCN-NEXT:   [[COPY47:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; GCN-NEXT:   [[COPY48:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; GCN-NEXT:   [[COPY49:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; GCN-NEXT:   [[COPY50:%[0-9]+]]:_(s32) = COPY $vgpr31
-  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32), [[COPY37]](s32), [[COPY38]](s32), [[COPY39]](s32), [[COPY40]](s32), [[COPY41]](s32), [[COPY42]](s32), [[COPY43]](s32), [[COPY44]](s32), [[COPY45]](s32), [[COPY46]](s32), [[COPY47]](s32), [[COPY48]](s32), [[COPY49]](s32), [[COPY50]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GCN-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GCN-NEXT:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GCN-NEXT:   [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GCN-NEXT:   [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GCN-NEXT:   [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GCN-NEXT:   [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GCN-NEXT:   [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GCN-NEXT:   [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; GCN-NEXT:   [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; GCN-NEXT:   [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; GCN-NEXT:   [[COPY40:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; GCN-NEXT:   [[COPY41:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; GCN-NEXT:   [[COPY42:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; GCN-NEXT:   [[COPY43:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; GCN-NEXT:   [[COPY44:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; GCN-NEXT:   [[COPY45:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; GCN-NEXT:   [[COPY46:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; GCN-NEXT:   [[COPY47:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; GCN-NEXT:   [[COPY48:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; GCN-NEXT:   [[COPY49:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; GCN-NEXT:   [[COPY50:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; GCN-NEXT:   [[COPY51:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; GCN-NEXT:   [[COPY52:%[0-9]+]]:_(s32) = COPY $vgpr31
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32), [[COPY37]](s32), [[COPY38]](s32), [[COPY39]](s32), [[COPY40]](s32), [[COPY41]](s32), [[COPY42]](s32), [[COPY43]](s32), [[COPY44]](s32), [[COPY45]](s32), [[COPY46]](s32), [[COPY47]](s32), [[COPY48]](s32), [[COPY49]](s32), [[COPY50]](s32), [[COPY51]](s32), [[COPY52]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -1996,54 +2026,55 @@ define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v2i16_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2i16_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[COPY19]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <2 x i16> @external_v2i16_func_void()
   store volatile <2 x i16> %val, ptr addrspace(1) undef
@@ -2053,54 +2084,55 @@ define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v3i16_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3i16_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; GCN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; GCN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2114,54 +2146,55 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v4i16_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4i16_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; GCN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; GCN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -2173,54 +2206,55 @@ define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v2f16_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2f16_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[COPY19]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <2 x half> @external_v2f16_func_void()
   store volatile <2 x half> %val, ptr addrspace(1) undef
@@ -2230,54 +2264,55 @@ define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v3f16_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3f16_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; GCN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; GCN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2291,54 +2326,55 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v4f16_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4f16_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; GCN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; GCN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -2350,55 +2386,56 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v3f32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3f32_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `ptr addrspace(1) undef`, align 16, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -2410,57 +2447,58 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v5f32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v5f32_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `ptr addrspace(1) undef`, align 32, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -2473,57 +2511,58 @@ define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i32_i64_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_i64_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call { i32, i64 } @external_i32_i64_func_void()
@@ -2562,56 +2601,57 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 {
 define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_a2i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_a2i32_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
-  ; GCN-NEXT:   G_STORE [[COPY20]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call [2 x i32] @external_a2i32_func_void()
   %val.0 = extractvalue [2 x i32] %val, 0
@@ -2624,65 +2664,66 @@ define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_a5i8_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_a5i8_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a5i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
   ; GCN-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32)
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32)
   ; GCN-NEXT:   [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
-  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN-NEXT:   [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32)
   ; GCN-NEXT:   [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16)
-  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN-NEXT:   [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32)
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32)
   ; GCN-NEXT:   [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16)
-  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN-NEXT:   [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32)
+  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32)
   ; GCN-NEXT:   [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1)
@@ -2708,51 +2749,52 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v32i32_i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v32i32_i32_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; GCN-NEXT:   $vgpr0 = COPY [[FRAME_INDEX]](p5)
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2774,51 +2816,52 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i32_v32i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_v32i32_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; GCN-NEXT:   $vgpr0 = COPY [[FRAME_INDEX]](p5)
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2840,51 +2883,52 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v33i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_void
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; GCN-NEXT:   $vgpr0 = COPY [[FRAME_INDEX]](p5)
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2899,17 +2943,18 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspace(1) %p, i32 %idx) #0 {
   ; GCN-LABEL: name: test_call_external_v33i32_func_v33i32_i32
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset1, align 16, addrspace 4)
@@ -2919,40 +2964,40 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa
   ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; GCN-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1)
   ; GCN-NEXT:   $vgpr0 = COPY [[FRAME_INDEX]](p5)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr2 = COPY [[UV1]](s32)
   ; GCN-NEXT:   $vgpr3 = COPY [[LOAD1]](s32)
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
index 9595b86e4bf63..8fb54005c07f4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
@@ -6,17 +6,18 @@ declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(p
 define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
   ; GCN-LABEL: name: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32
   ; GCN: bb.1 (%ir-block.1):
-  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -29,24 +30,24 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
   ; GCN-NEXT:   G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.in.gep1, addrspace 5)
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; GCN-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; GCN-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
   ; GCN-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -54,16 +55,16 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
   ; GCN-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
   ; GCN-NEXT:   G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5)
   ; GCN-NEXT:   $vgpr0 = COPY [[FRAME_INDEX1]](p5)
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index 2a45ef825bf20..b77b6609b0f8a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -97,48 +97,49 @@ declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8,
 define amdgpu_kernel void @test_call_external_void_func_void() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_void
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -173,12 +174,12 @@ define void @test_func_call_external_void_func_void() #0 {
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -207,50 +208,51 @@ define void @test_func_call_external_void_func_void() #0 {
 define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_empty_struct
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_empty_struct
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_empty_struct, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -262,50 +264,51 @@ define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 {
 define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_empty_array
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_empty_array
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_empty_array, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -317,51 +320,52 @@ define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 {
 define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i1_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s1)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -373,53 +377,54 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i1_signext
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_signext
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1)
   ; CHECK-NEXT:   $vgpr0 = COPY [[SEXT]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -432,53 +437,54 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i1_zeroext
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_zeroext
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ZEXT]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -491,53 +497,54 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i8_imm
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 123
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[C]](s8)
   ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT1]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -549,54 +556,55 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i8_signext
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_signext
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[LOAD]](s8)
   ; CHECK-NEXT:   [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[SEXT]](s16)
   ; CHECK-NEXT:   $vgpr0 = COPY [[SEXT1]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -609,54 +617,55 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i8_zeroext
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_zeroext
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[LOAD]](s8)
   ; CHECK-NEXT:   [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ZEXT]](s16)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ZEXT1]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -669,51 +678,52 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i16_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 123
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -725,53 +735,54 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i16_signext
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_signext
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16)
   ; CHECK-NEXT:   $vgpr0 = COPY [[SEXT]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -784,53 +795,54 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i16_zeroext
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_zeroext
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ZEXT]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -843,51 +855,52 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i32_imm
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -937,52 +950,53 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg
 define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i64_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 123
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i64
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -994,55 +1008,56 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `ptr addrspace(1) null`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1055,56 +1070,57 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i64_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1116,55 +1132,56 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i48
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1177,55 +1194,56 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i48_signext
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_signext
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1238,55 +1256,56 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i48_zeroext
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_zeroext
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1299,53 +1318,54 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_p0_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset1, align 16, addrspace 4)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_p0
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p0)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1357,55 +1377,56 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2p0
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p0>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x p0>) from `ptr addrspace(1) null`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2p0
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x p0>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1418,17 +1439,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v3i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
@@ -1437,24 +1459,24 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
   ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<3 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i64
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<3 x s64>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -1463,16 +1485,16 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
   ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
   ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1487,17 +1509,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187
@@ -1506,24 +1529,24 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
   ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<4 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2, 3)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i64
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<4 x s64>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -1534,16 +1557,16 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
   ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
   ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
   ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1557,51 +1580,52 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
 define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_f16_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4400
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f16
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1613,50 +1637,51 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_f32_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   $vgpr0 = COPY [[C]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1668,54 +1693,55 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2f32_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1727,56 +1753,57 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v3f32_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1788,17 +1815,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v5f32_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
@@ -1807,24 +1835,24 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5f32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C5]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C6]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C6]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C7]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -1832,16 +1860,16 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
   ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1853,52 +1881,53 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_f64_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f64
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
-  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1910,56 +1939,57 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2f64_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f64
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1971,41 +2001,42 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v3f64_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 8.000000e+00
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64), [[C2]](s64)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f64
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s64>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -2014,16 +2045,16 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
   ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
   ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2035,51 +2066,52 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i16
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   $vgpr0 = COPY [[LOAD]](<2 x s16>)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2092,56 +2124,57 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v3i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i16
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>)
-  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF3]](s16)
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF2]](s16)
   ; CHECK-NEXT:   [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV3]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV4]](<2 x s16>)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2154,56 +2187,57 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v3f16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f16
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>)
-  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF3]](s16)
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF2]](s16)
   ; CHECK-NEXT:   [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV3]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV4]](<2 x s16>)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2216,53 +2250,54 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s16>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2275,17 +2310,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i16_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3
@@ -2293,38 +2329,38 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C4]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C5]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C6]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2336,57 +2372,58 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v5i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<5 x s16>) from `ptr addrspace(1) undef`, align 16, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i16
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<5 x s16>)
-  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF3]](s16)
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF2]](s16)
   ; CHECK-NEXT:   [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV5]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV6]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV7]](<2 x s16>)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2399,58 +2436,59 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v7i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<7 x s16>) from `ptr addrspace(1) undef`, align 16, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v7i16
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<7 x s16>)
-  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF3]](s16)
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF2]](s16)
   ; CHECK-NEXT:   [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s16>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV7]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV8]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV9]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV10]](<2 x s16>)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2463,43 +2501,44 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v63i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<63 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<63 x s16>) from `ptr addrspace(1) undef`, align 128, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v63i16
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<63 x s16>)
-  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF3]](s16)
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF2]](s16)
   ; CHECK-NEXT:   [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<64 x s16>)
   ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -2536,16 +2575,16 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
   ; CHECK-NEXT:   $vgpr28 = COPY [[UV91]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr29 = COPY [[UV92]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr30 = COPY [[UV93]](<2 x s16>)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
@@ -2558,43 +2597,44 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v65i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<65 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<65 x s16>) from `ptr addrspace(1) undef`, align 256, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v65i16
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<65 x s16>)
-  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF3]](s16)
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF2]](s16)
   ; CHECK-NEXT:   [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>), [[UV95:%[0-9]+]]:_(<2 x s16>), [[UV96:%[0-9]+]]:_(<2 x s16>), [[UV97:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<66 x s16>)
   ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -2634,16 +2674,16 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
   ; CHECK-NEXT:   $vgpr28 = COPY [[UV93]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr29 = COPY [[UV94]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr30 = COPY [[UV95]](<2 x s16>)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
@@ -2656,39 +2696,40 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v66i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<66 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<66 x s16>) from `ptr addrspace(1) undef`, align 256, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v66i16
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<66 x s16>)
   ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -2729,16 +2770,16 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
   ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](<2 x s16>)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
@@ -2751,51 +2792,52 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2f16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f16
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   $vgpr0 = COPY [[LOAD]](<2 x s16>)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2808,53 +2850,54 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s32>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2867,54 +2910,55 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i32_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2926,17 +2970,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v3i32_imm
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
@@ -2944,39 +2989,39 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2988,17 +3033,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v3i32_i32
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
@@ -3007,40 +3053,40 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i32_i32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C4]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C5]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C6]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[C3]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -3052,55 +3098,56 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s32>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -3113,17 +3160,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i32_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
@@ -3131,40 +3179,40 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C4]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C5]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C6]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -3176,17 +3224,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v5i32_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
@@ -3195,24 +3244,24 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C5]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C6]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C6]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C7]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -3220,16 +3269,16 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
   ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -3241,40 +3290,41 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v8i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s32>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -3285,16 +3335,16 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
   ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
   ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
   ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -3308,17 +3358,18 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v8i32_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
@@ -3330,24 +3381,24 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32), [[C5]](s32), [[C6]](s32), [[C7]](s32)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C8]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C8]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C9]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C9]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C10]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C10]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -3358,16 +3409,16 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
   ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
   ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
   ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -3379,40 +3430,41 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v16i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s32>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -3431,16 +3483,16 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
   ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
   ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
   ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -3454,40 +3506,41 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v32i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
   ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -3525,16 +3578,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
   ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
   ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
   ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
@@ -3548,17 +3601,18 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v32i32_i32
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
@@ -3567,24 +3621,24 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
   ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s32) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
   ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -3625,16 +3679,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
   ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
   ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
   ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF2]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF3]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
@@ -3649,17 +3703,18 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v32i32_i8_i8_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
@@ -3668,24 +3723,24 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
   ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
   ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -3696,10 +3751,10 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
   ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32)
   ; CHECK-NEXT:   G_STORE [[ANYEXT]](s16), [[PTR_ADD2]](p5) :: (store (s16) into stack + 4, align 4, addrspace 5)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16)
   ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
   ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32)
-  ; CHECK-NEXT:   G_STORE [[COPY18]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5)
+  ; CHECK-NEXT:   G_STORE [[COPY20]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5)
   ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
   ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32)
   ; CHECK-NEXT:   G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
@@ -3734,16 +3789,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
   ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
   ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
   ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
-  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF2]](p4)
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF3]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 16, implicit-def $scc
@@ -3760,17 +3815,18 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v32i32_p3_p5
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
@@ -3779,24 +3835,24 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
   ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p5) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_p3_p5
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
   ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -3840,16 +3896,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
   ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
   ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
   ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF2]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF3]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 12, implicit-def $scc
@@ -3865,17 +3921,18 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
 define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (s8) from %ir.ptr0, align 4, addrspace 1)
@@ -3884,39 +3941,39 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
   ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: ("amdgpu-noclobber" load (s32) from %ir.ptr0 + 4, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_struct_i8_i32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
   ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT1]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[LOAD2]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -3982,17 +4039,18 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
 define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_byval_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
   ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.val
@@ -4002,40 +4060,40 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
   ; CHECK-NEXT:   G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_byval_struct_i8_i32
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
   ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32)
   ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
   ; CHECK-NEXT:   G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.val, align 4, addrspace 5)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_byval_struct_i8_i32, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
@@ -4063,7 +4121,7 @@ define void @call_byval_3ai32_byval_i8_align32(ptr addrspace(5) %incoming0, ptr
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr0
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p5) = COPY $vgpr1
@@ -4071,7 +4129,7 @@ define void @call_byval_3ai32_byval_i8_align32(ptr addrspace(5) %incoming0, ptr
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a3i32_byval_i8_align32
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -4123,13 +4181,13 @@ define void @call_byval_a4i64_align4_higher_source_align(ptr addrspace(5) align
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr0
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -4163,40 +4221,41 @@ define void @call_byval_a4i64_align4_higher_source_align(ptr addrspace(5) align
 define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<2 x s8>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i8
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<2 x s8>)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
@@ -4205,16 +4264,16 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
   ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT2]](s32)
   ; CHECK-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
   ; CHECK-NEXT:   $vgpr1 = COPY [[ANYEXT3]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -4228,40 +4287,41 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v3i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<3 x s8>) from %ir.ptr, align 4, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i8
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<3 x s8>)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
@@ -4273,16 +4333,16 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
   ; CHECK-NEXT:   $vgpr1 = COPY [[ANYEXT4]](s32)
   ; CHECK-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16)
   ; CHECK-NEXT:   $vgpr2 = COPY [[ANYEXT5]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -4296,40 +4356,41 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<4 x s8>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i8
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
@@ -4344,16 +4405,16 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
   ; CHECK-NEXT:   $vgpr2 = COPY [[ANYEXT6]](s32)
   ; CHECK-NEXT:   [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16)
   ; CHECK-NEXT:   $vgpr3 = COPY [[ANYEXT7]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -4367,40 +4428,41 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v8i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s8>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i8
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<8 x s8>)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
@@ -4427,16 +4489,16 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
   ; CHECK-NEXT:   $vgpr6 = COPY [[ANYEXT14]](s32)
   ; CHECK-NEXT:   [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16)
   ; CHECK-NEXT:   $vgpr7 = COPY [[ANYEXT15]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -4450,40 +4512,41 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v16i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s8>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i8
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
@@ -4534,16 +4597,16 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
   ; CHECK-NEXT:   $vgpr14 = COPY [[ANYEXT30]](s32)
   ; CHECK-NEXT:   [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16)
   ; CHECK-NEXT:   $vgpr15 = COPY [[ANYEXT31]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -4557,17 +4620,18 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
 define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
   ; CHECK-LABEL: name: stack_passed_arg_alignment_v32i32_f64
   ; CHECK: bb.1.entry:
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset1, align 16, addrspace 4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
@@ -4575,24 +4639,24 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset, align 16, addrspace 4)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @stack_passed_f64_arg
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136
-  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<32 x s32>)
   ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -4637,16 +4701,16 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
   ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
   ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
   ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @stack_passed_f64_arg, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 12, implicit-def $scc
@@ -4668,7 +4732,7 @@ define void @stack_12xv3i32() #0 {
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32)
@@ -4701,7 +4765,7 @@ define void @stack_12xv3i32() #0 {
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -4811,7 +4875,7 @@ define void @stack_12xv3f32() #0 {
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32)
@@ -4844,7 +4908,7 @@ define void @stack_12xv3f32() #0 {
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -4954,7 +5018,7 @@ define void @stack_8xv5i32() #0 {
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
@@ -4983,7 +5047,7 @@ define void @stack_8xv5i32() #0 {
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -5097,7 +5161,7 @@ define void @stack_8xv5f32() #0 {
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
@@ -5126,7 +5190,7 @@ define void @stack_8xv5f32() #0 {
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
index d0d1e47707cd0..72cf3a08e7ffb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
@@ -4,49 +4,50 @@
 define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) {
   ; CHECK-LABEL: name: test_indirect_call_sgpr_ptr
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.fptr.kernarg.offset1, align 16, addrspace 4)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index 932d435bef2dc..967979e76dd8a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -816,7 +816,7 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
   ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
@@ -933,7 +933,7 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
   ; GCN-NEXT:   G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store (s64) into %ir.alloca1 + 8, addrspace 5)
   ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval
   ; GCN-NEXT:   [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN-NEXT:   [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GCN-NEXT:   [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GCN-NEXT:   [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GCN-NEXT:   [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GCN-NEXT:   [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -984,7 +984,7 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
   ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
@@ -1097,7 +1097,7 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
   ; GCN-NEXT:   G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca + 8, addrspace 5)
   ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed
   ; GCN-NEXT:   [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN-NEXT:   [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GCN-NEXT:   [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GCN-NEXT:   [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GCN-NEXT:   [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GCN-NEXT:   [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -1176,14 +1176,14 @@ define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) #1 {
   ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32)
   ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i64_fastcc_i64
   ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -1225,14 +1225,14 @@ define hidden fastcc ptr addrspace(1) @sibling_call_p1i8_fastcc_p1i8(ptr addrspa
   ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32)
   ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @p1i8_fastcc_p1i8
   ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -1274,13 +1274,13 @@ define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) #1 {
   ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
   ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i16_fastcc_i16
   ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -1321,13 +1321,13 @@ define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) #1 {
   ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
   ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @f16_fastcc_f16
   ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -1368,7 +1368,7 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1
   ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
   ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
@@ -1377,7 +1377,7 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
   ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16
   ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -1422,14 +1422,14 @@ define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) #1
   ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
   ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
   ; GCN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>)
   ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v4i16_fastcc_v4i16
   ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -1471,7 +1471,7 @@ define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1
   ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
@@ -1482,7 +1482,7 @@ define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
   ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v2i64_fastcc_v2i64
   ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY4]]

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
index 9bc0af2fc9136..b9cdcff95ed1e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
@@ -15,11 +15,11 @@ define void @tail_call_void_func_void() {
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
   ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll
index 0e1e1730b4b5e..ea0376afe8ec0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll
@@ -12,11 +12,9 @@ define void @func_use_lds_global() {
 ; GFX8-LABEL: func_use_lds_global:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b64 s[4:5], 0xc8
 ; GFX8-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX8-NEXT:    s_mov_b32 m0, -1
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_mov_b64 s[0:1], s[6:7]
 ; GFX8-NEXT:    s_trap 2
 ; GFX8-NEXT:    ds_write_b32 v0, v0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
@@ -39,9 +37,7 @@ define void @func_use_lds_global_constexpr_cast() {
 ; GFX8-LABEL: func_use_lds_global_constexpr_cast:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b64 s[4:5], 0xc8
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_mov_b64 s[0:1], s[6:7]
 ; GFX8-NEXT:    s_trap 2
 ; GFX8-NEXT:    flat_store_dword v[0:1], v0
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
index 69f9b0a2709ce..cb4989c3ee2d0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
@@ -209,16 +209,19 @@ body: |
     liveins: $vgpr0
 
     ; VI-LABEL: name: test_addrspacecast_p5_to_p0
-    ; VI: liveins: $vgpr0
+    ; VI: liveins: $vgpr0, $sgpr4_sgpr5
     ; VI-NEXT: {{  $}}
-    ; VI-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; VI-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 228
-    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
-    ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5)
+    ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY $vgpr0
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
+    ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p5)
     ; VI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
     ; VI-NEXT: [[C1:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1
     ; VI-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
-    ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[C1]]
+    ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p5), [[C1]]
     ; VI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]]
     ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0)
     ; GFX9-LABEL: name: test_addrspacecast_p5_to_p0
@@ -301,16 +304,19 @@ body: |
     liveins: $vgpr0
 
     ; VI-LABEL: name: test_addrspacecast_p3_to_p0
-    ; VI: liveins: $vgpr0
+    ; VI: liveins: $vgpr0, $sgpr4_sgpr5
     ; VI-NEXT: {{  $}}
-    ; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-    ; VI-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 232
-    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
-    ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
+    ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4)
+    ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3)
     ; VI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
     ; VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
     ; VI-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
-    ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[C1]]
+    ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p3), [[C1]]
     ; VI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]]
     ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](p0)
     ; GFX9-LABEL: name: test_addrspacecast_p3_to_p0
@@ -533,20 +539,24 @@ body: |
     liveins: $vgpr0_vgpr1
 
     ; VI-LABEL: name: test_addrspacecast_v2p3_to_v2p0
-    ; VI: liveins: $vgpr0_vgpr1
+    ; VI: liveins: $vgpr0_vgpr1, $sgpr4_sgpr5
     ; VI-NEXT: {{  $}}
-    ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
-    ; VI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
-    ; VI-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 232
-    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p4) = COPY [[C]](p4)
-    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
+    ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
+    ; VI-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4)
     ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
     ; VI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
     ; VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
     ; VI-NEXT: [[C2:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
     ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C1]]
     ; VI-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C2]]
-    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[C]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
+    ; VI-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY]](p4)
+    ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY3]], [[C]](s64)
+    ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4)
     ; VI-NEXT: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
     ; VI-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[LOAD1]](s32)
     ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C1]]
@@ -754,13 +764,18 @@ stack:
 body: |
   bb.0:
     ; VI-LABEL: name: test_addrspacecast_p5_fi_to_p0
-    ; VI: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
-    ; VI-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 228
-    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
+    ; VI: liveins: $sgpr4_sgpr5
+    ; VI-NEXT: {{  $}}
+    ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
+    ; VI-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p4) = COPY [[COPY]](p4)
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+    ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
     ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[FRAME_INDEX]](p5)
     ; VI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
-    ; VI-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[MV]](p0)
-    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](p0)
+    ; VI-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[MV]](p0)
+    ; VI-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](p0)
     ; GFX9-LABEL: name: test_addrspacecast_p5_fi_to_p0
     ; GFX9: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
     ; GFX9-NEXT: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
index 46ae9d63ebaec..b2e4d6787b74c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
@@ -9,8 +9,7 @@
 define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
 ; CI-LABEL: is_private_vgpr:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; CI-NEXT:    s_load_dword s2, s[4:5], 0x32
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
 ; CI-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    v_mov_b32_e32 v0, s0
@@ -19,7 +18,9 @@ define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
 ; CI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; CI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] glc
 ; CI-NEXT:    s_waitcnt vmcnt(0)
-; CI-NEXT:    v_cmp_eq_u32_e32 vcc, s2, v1
+; CI-NEXT:    s_load_dword s0, s[4:5], 0x11
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_cmp_eq_u32_e32 vcc, s0, v1
 ; CI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; CI-NEXT:    flat_store_dword v[0:1], v0
 ; CI-NEXT:    s_endpgm
@@ -80,9 +81,9 @@ define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
 define amdgpu_kernel void @is_private_sgpr(ptr %ptr) {
 ; CI-LABEL: is_private_sgpr:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_load_dword s0, s[4:5], 0x32
+; CI-NEXT:    s_load_dword s0, s[4:5], 0x11
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    s_cmp_lg_u32 s1, s0
 ; CI-NEXT:    s_cbranch_scc1 .LBB1_2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
index 76c4023e2f688..048fb5586ac0d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
@@ -9,8 +9,7 @@
 define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
 ; CI-LABEL: is_local_vgpr:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; CI-NEXT:    s_load_dword s2, s[4:5], 0x33
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
 ; CI-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    v_mov_b32_e32 v0, s0
@@ -19,7 +18,9 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
 ; CI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; CI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] glc
 ; CI-NEXT:    s_waitcnt vmcnt(0)
-; CI-NEXT:    v_cmp_eq_u32_e32 vcc, s2, v1
+; CI-NEXT:    s_load_dword s0, s[4:5], 0x10
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_cmp_eq_u32_e32 vcc, s0, v1
 ; CI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; CI-NEXT:    flat_store_dword v[0:1], v0
 ; CI-NEXT:    s_endpgm
@@ -80,9 +81,9 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
 define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
 ; CI-LABEL: is_local_sgpr:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_load_dword s0, s[4:5], 0x33
+; CI-NEXT:    s_load_dword s0, s[4:5], 0x10
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    s_cmp_lg_u32 s1, s0
 ; CI-NEXT:    s_cbranch_scc1 .LBB1_2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
index 15573de41b517..fec564f9b45db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
@@ -75,8 +75,8 @@ bb.2:
   store volatile i32 0, ptr addrspace(1) undef
   ret void
 }
-; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 16
-; DEFAULTSIZE: ; ScratchSize: 16
+; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4112
+; DEFAULTSIZE: ; ScratchSize: 4112
 
 ; ASSUME1024: .amdhsa_private_segment_fixed_size 1040
 ; ASSUME1024: ; ScratchSize: 1040
@@ -137,8 +137,8 @@ bb.1:
   ret void
 }
 
-; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 64
-; DEFAULTSIZE: ; ScratchSize: 64
+; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4160
+; DEFAULTSIZE: ; ScratchSize: 4160
 
 ; ASSUME1024: .amdhsa_private_segment_fixed_size 1088
 ; ASSUME1024: ; ScratchSize: 1088
@@ -265,6 +265,3 @@ bb.1:
 declare i32 @llvm.amdgcn.workitem.id.x() #0
 
 attributes #0 = { nounwind readnone speculatable }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; ASSUME1024: {{.*}}
-; DEFAULTSIZE: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index e1f3a15a3483a..b3be3702b3f52 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -231,5 +231,5 @@ attributes #1 = { nounwind }
 ;.
 ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.

diff  --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index f97bbafab5d9c..163bd3ee063fb 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -458,7 +458,7 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
-; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR8]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)*
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -477,7 +477,7 @@ define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9
-; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR13:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)*
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -496,7 +496,7 @@ define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %p
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9
-; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR14:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR13:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)*
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    call void @func_indirect_use_queue_ptr()
@@ -515,7 +515,7 @@ define void @indirect_use_group_to_flat_addrspacecast() #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR8]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null)
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -593,7 +593,7 @@ define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR14:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -611,7 +611,7 @@ define void @use_implicitarg_ptr() #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_implicitarg_ptr
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR14]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -628,7 +628,7 @@ define void @func_indirect_use_implicitarg_ptr() #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_implicitarg_ptr
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR14]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_implicitarg_ptr()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -944,13 +944,13 @@ attributes #5 = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }

diff  --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
index a4b7a219b4882..4acf31b249590 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -431,7 +431,7 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %pt
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
-; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR11]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32*
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, i32* [[STOF]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -449,7 +449,7 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_private_to_flat_addrspacecast
-; ATTRIBUTOR_HSA-SAME: (i32 addrspace(5)* [[PTR:%.*]]) #[[ATTR12]] {
+; ATTRIBUTOR_HSA-SAME: (i32 addrspace(5)* [[PTR:%.*]]) #[[ATTR11]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[STOF:%.*]] = addrspacecast i32 addrspace(5)* [[PTR]] to i32*
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, i32* [[STOF]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -541,7 +541,7 @@ define amdgpu_kernel void @use_is_shared(i8* %ptr) #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_is_shared
-; ATTRIBUTOR_HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR12]] {
+; ATTRIBUTOR_HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[PTR]])
 ; ATTRIBUTOR_HSA-NEXT:    [[EXT:%.*]] = zext i1 [[IS_SHARED]] to i32
 ; ATTRIBUTOR_HSA-NEXT:    store i32 [[EXT]], i32 addrspace(1)* undef, align 4
@@ -562,7 +562,7 @@ define amdgpu_kernel void @use_is_private(i8* %ptr) #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_is_private
-; ATTRIBUTOR_HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR12]] {
+; ATTRIBUTOR_HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(i8* [[PTR]])
 ; ATTRIBUTOR_HSA-NEXT:    [[EXT:%.*]] = zext i1 [[IS_PRIVATE]] to i32
 ; ATTRIBUTOR_HSA-NEXT:    store i32 [[EXT]], i32 addrspace(1)* undef, align 4
@@ -657,6 +657,5 @@ attributes #1 = { nounwind }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.

diff  --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
index 1212e21b839ab..996d18c236c83 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
@@ -8,12 +8,12 @@
 @alias = hidden alias void (), void ()* @aliasee_default
 
 ; ALL-LABEL: {{^}}kernel:
-; GFX908: .amdhsa_next_free_vgpr 32
+; GFX908: .amdhsa_next_free_vgpr 41
 ; GFX908-NEXT: .amdhsa_next_free_sgpr 33
 
-; GFX90A: .amdhsa_next_free_vgpr 59
+; GFX90A: .amdhsa_next_free_vgpr 71
 ; GFX90A-NEXT: .amdhsa_next_free_sgpr 33
-; GFX90A-NEXT: .amdhsa_accum_offset 32
+; GFX90A-NEXT: .amdhsa_accum_offset 44
 define amdgpu_kernel void @kernel() #0 {
 bb:
   call void @alias() #2

diff  --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
index cfbef98db3ab8..729e6dfc1cb7e 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
@@ -9,7 +9,7 @@
 ; The parent kernel has a higher VGPR usage than the possible callees.
 
 ; CHECK-LABEL: {{^}}kernel1:
-; CHECK: .amdhsa_next_free_vgpr 41
+; CHECK: .amdhsa_next_free_vgpr 42
 ; CHECK-NEXT: .amdhsa_next_free_sgpr 33
 define amdgpu_kernel void @kernel1() #0 {
 bb:

diff  --git a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
index fe9a6d8b32b89..516657b9560e1 100644
--- a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
@@ -5,19 +5,20 @@
 define amdgpu_kernel void @call_memory_arg_load(i32 addrspace(3)* %ptr, i32) #0 {
 ; GCN-LABEL: call_memory_arg_load:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dword s4, s[4:5], 0x0
-; GCN-NEXT:    s_add_u32 flat_scratch_lo, s6, s9
-; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s7, 0
-; GCN-NEXT:    s_add_u32 s0, s0, s9
+; GCN-NEXT:    s_load_dword s6, s[6:7], 0x0
+; GCN-NEXT:    s_add_u32 flat_scratch_lo, s8, s11
+; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; GCN-NEXT:    s_add_u32 s0, s0, s11
 ; GCN-NEXT:    s_addc_u32 s1, s1, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v0, s4
+; GCN-NEXT:    v_mov_b32_e32 v0, s6
 ; GCN-NEXT:    ds_read_b32 v0, v0
+; GCN-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GCN-NEXT:    s_mov_b32 s32, 0
-; GCN-NEXT:    s_getpc_b64 s[4:5]
-; GCN-NEXT:    s_add_u32 s4, s4, func at rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s5, s5, func at rel32@hi+12
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT:    s_getpc_b64 s[8:9]
+; GCN-NEXT:    s_add_u32 s8, s8, func at rel32@lo+4
+; GCN-NEXT:    s_addc_u32 s9, s9, func at rel32@hi+12
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; GCN-NEXT:    s_endpgm
   %vgpr = load volatile i32, i32 addrspace(3)* %ptr
   call void @func(i32 %vgpr)
@@ -28,20 +29,21 @@ define amdgpu_kernel void @call_memory_arg_load(i32 addrspace(3)* %ptr, i32) #0
 define amdgpu_kernel void @call_memory_no_dep(i32 addrspace(1)* %ptr, i32) #0 {
 ; GCN-LABEL: call_memory_no_dep:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GCN-NEXT:    s_add_u32 flat_scratch_lo, s6, s9
-; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s7, 0
-; GCN-NEXT:    s_add_u32 s0, s0, s9
+; GCN-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GCN-NEXT:    s_add_u32 flat_scratch_lo, s8, s11
+; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; GCN-NEXT:    s_add_u32 s0, s0, s11
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    s_addc_u32 s1, s1, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    global_store_dword v0, v0, s[4:5]
+; GCN-NEXT:    global_store_dword v0, v0, s[6:7]
+; GCN-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    s_mov_b32 s32, 0
-; GCN-NEXT:    s_getpc_b64 s[6:7]
-; GCN-NEXT:    s_add_u32 s6, s6, func at rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s7, s7, func at rel32@hi+12
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GCN-NEXT:    s_getpc_b64 s[8:9]
+; GCN-NEXT:    s_add_u32 s8, s8, func at rel32@lo+4
+; GCN-NEXT:    s_addc_u32 s9, s9, func at rel32@hi+12
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; GCN-NEXT:    s_endpgm
   store i32 0, i32 addrspace(1)* %ptr
   call void @func(i32 0)
@@ -52,18 +54,19 @@ define amdgpu_kernel void @call_memory_no_dep(i32 addrspace(1)* %ptr, i32) #0 {
 define amdgpu_kernel void @call_no_wait_after_call(i32 addrspace(1)* %ptr, i32) #0 {
 ; GCN-LABEL: call_no_wait_after_call:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_add_u32 flat_scratch_lo, s6, s9
-; GCN-NEXT:    s_load_dwordx2 s[34:35], s[4:5], 0x0
-; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s7, 0
-; GCN-NEXT:    s_add_u32 s0, s0, s9
+; GCN-NEXT:    s_add_u32 flat_scratch_lo, s8, s11
+; GCN-NEXT:    s_load_dwordx2 s[34:35], s[6:7], 0x0
+; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; GCN-NEXT:    s_add_u32 s0, s0, s11
 ; GCN-NEXT:    s_addc_u32 s1, s1, 0
+; GCN-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    s_mov_b32 s32, 0
-; GCN-NEXT:    s_getpc_b64 s[4:5]
-; GCN-NEXT:    s_add_u32 s4, s4, func at rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s5, s5, func at rel32@hi+12
+; GCN-NEXT:    s_getpc_b64 s[8:9]
+; GCN-NEXT:    s_add_u32 s8, s8, func at rel32@lo+4
+; GCN-NEXT:    s_addc_u32 s9, s9, func at rel32@hi+12
 ; GCN-NEXT:    v_mov_b32_e32 v40, 0
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; GCN-NEXT:    global_store_dword v40, v40, s[34:35]
 ; GCN-NEXT:    s_endpgm
   call void @func(i32 0)
@@ -74,18 +77,19 @@ define amdgpu_kernel void @call_no_wait_after_call(i32 addrspace(1)* %ptr, i32)
 define amdgpu_kernel void @call_no_wait_after_call_return_val(i32 addrspace(1)* %ptr, i32) #0 {
 ; GCN-LABEL: call_no_wait_after_call_return_val:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_add_u32 flat_scratch_lo, s6, s9
-; GCN-NEXT:    s_load_dwordx2 s[34:35], s[4:5], 0x0
-; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s7, 0
-; GCN-NEXT:    s_add_u32 s0, s0, s9
+; GCN-NEXT:    s_add_u32 flat_scratch_lo, s8, s11
+; GCN-NEXT:    s_load_dwordx2 s[34:35], s[6:7], 0x0
+; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; GCN-NEXT:    s_add_u32 s0, s0, s11
 ; GCN-NEXT:    s_addc_u32 s1, s1, 0
+; GCN-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    s_mov_b32 s32, 0
-; GCN-NEXT:    s_getpc_b64 s[4:5]
-; GCN-NEXT:    s_add_u32 s4, s4, func.return at rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s5, s5, func.return at rel32@hi+12
+; GCN-NEXT:    s_getpc_b64 s[8:9]
+; GCN-NEXT:    s_add_u32 s8, s8, func.return at rel32@lo+4
+; GCN-NEXT:    s_addc_u32 s9, s9, func.return at rel32@hi+12
 ; GCN-NEXT:    v_mov_b32_e32 v40, 0
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; GCN-NEXT:    global_store_dword v40, v0, s[34:35]
 ; GCN-NEXT:    s_endpgm
   %rv = call i32 @func.return(i32 0)
@@ -97,18 +101,19 @@ define amdgpu_kernel void @call_no_wait_after_call_return_val(i32 addrspace(1)*
 define amdgpu_kernel void @call_got_load(i32 addrspace(1)* %ptr, i32) #0 {
 ; GCN-LABEL: call_got_load:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_add_u32 flat_scratch_lo, s6, s9
-; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s7, 0
-; GCN-NEXT:    s_add_u32 s0, s0, s9
+; GCN-NEXT:    s_add_u32 flat_scratch_lo, s8, s11
+; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s9, 0
+; GCN-NEXT:    s_add_u32 s0, s0, s11
 ; GCN-NEXT:    s_addc_u32 s1, s1, 0
-; GCN-NEXT:    s_getpc_b64 s[4:5]
-; GCN-NEXT:    s_add_u32 s4, s4, got.func at gotpcrel32@lo+4
-; GCN-NEXT:    s_addc_u32 s5, s5, got.func at gotpcrel32@hi+12
-; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT:    s_getpc_b64 s[6:7]
+; GCN-NEXT:    s_add_u32 s6, s6, got.func at gotpcrel32@lo+4
+; GCN-NEXT:    s_addc_u32 s7, s7, got.func at gotpcrel32@hi+12
+; GCN-NEXT:    s_load_dwordx2 s[8:9], s[6:7], 0x0
+; GCN-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    s_mov_b32 s32, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; GCN-NEXT:    s_endpgm
   call void @got.func(i32 0)
   ret void

diff  --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
index 3224ee693efe9..3f0c627641ac9 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=4 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 
 ; GCN-LABEL: {{^}}use_dispatch_ptr:
 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5]

diff  --git a/llvm/test/CodeGen/AMDGPU/cc-update.ll b/llvm/test/CodeGen/AMDGPU/cc-update.ll
index a65a41b452e50..3c23aa5cb5c1b 100644
--- a/llvm/test/CodeGen/AMDGPU/cc-update.ll
+++ b/llvm/test/CodeGen/AMDGPU/cc-update.ll
@@ -68,76 +68,80 @@ entry:
 define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 {
 ; GFX803-LABEL: test_kern_call:
 ; GFX803:       ; %bb.0: ; %entry
-; GFX803-NEXT:    s_add_i32 s10, s10, s15
-; GFX803-NEXT:    s_lshr_b32 flat_scratch_hi, s10, 8
+; GFX803-NEXT:    s_add_i32 s12, s12, s17
+; GFX803-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX803-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GFX803-NEXT:    s_add_u32 s0, s0, s15
+; GFX803-NEXT:    s_add_u32 s0, s0, s17
 ; GFX803-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GFX803-NEXT:    v_or_b32_e32 v0, v0, v1
-; GFX803-NEXT:    s_mov_b32 flat_scratch_lo, s11
+; GFX803-NEXT:    s_mov_b32 flat_scratch_lo, s13
 ; GFX803-NEXT:    s_addc_u32 s1, s1, 0
-; GFX803-NEXT:    s_mov_b64 s[10:11], s[8:9]
+; GFX803-NEXT:    s_mov_b32 s13, s15
+; GFX803-NEXT:    s_mov_b32 s12, s14
 ; GFX803-NEXT:    v_or_b32_e32 v31, v0, v2
-; GFX803-NEXT:    s_mov_b64 s[8:9], s[6:7]
+; GFX803-NEXT:    s_mov_b32 s14, s16
 ; GFX803-NEXT:    s_mov_b32 s32, 0
-; GFX803-NEXT:    s_getpc_b64 s[16:17]
-; GFX803-NEXT:    s_add_u32 s16, s16, ex at rel32@lo+4
-; GFX803-NEXT:    s_addc_u32 s17, s17, ex at rel32@hi+12
-; GFX803-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX803-NEXT:    s_getpc_b64 s[18:19]
+; GFX803-NEXT:    s_add_u32 s18, s18, ex at rel32@lo+4
+; GFX803-NEXT:    s_addc_u32 s19, s19, ex at rel32@hi+12
+; GFX803-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX803-NEXT:    s_endpgm
 ;
 ; GFX900-LABEL: test_kern_call:
 ; GFX900:       ; %bb.0: ; %entry
-; GFX900-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
-; GFX900-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
-; GFX900-NEXT:    s_add_u32 s0, s0, s15
+; GFX900-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; GFX900-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; GFX900-NEXT:    s_add_u32 s0, s0, s17
 ; GFX900-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX900-NEXT:    s_addc_u32 s1, s1, 0
-; GFX900-NEXT:    s_mov_b64 s[10:11], s[8:9]
+; GFX900-NEXT:    s_mov_b32 s13, s15
+; GFX900-NEXT:    s_mov_b32 s12, s14
 ; GFX900-NEXT:    v_or3_b32 v31, v0, v1, v2
-; GFX900-NEXT:    s_mov_b64 s[8:9], s[6:7]
+; GFX900-NEXT:    s_mov_b32 s14, s16
 ; GFX900-NEXT:    s_mov_b32 s32, 0
-; GFX900-NEXT:    s_getpc_b64 s[16:17]
-; GFX900-NEXT:    s_add_u32 s16, s16, ex at rel32@lo+4
-; GFX900-NEXT:    s_addc_u32 s17, s17, ex at rel32@hi+12
-; GFX900-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX900-NEXT:    s_getpc_b64 s[18:19]
+; GFX900-NEXT:    s_add_u32 s18, s18, ex at rel32@lo+4
+; GFX900-NEXT:    s_addc_u32 s19, s19, ex at rel32@hi+12
+; GFX900-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX900-NEXT:    s_endpgm
 ;
 ; GFX1010-LABEL: test_kern_call:
 ; GFX1010:       ; %bb.0: ; %entry
-; GFX1010-NEXT:    s_add_u32 s10, s10, s15
+; GFX1010-NEXT:    s_add_u32 s12, s12, s17
 ; GFX1010-NEXT:    s_mov_b32 s32, 0
-; GFX1010-NEXT:    s_addc_u32 s11, s11, 0
-; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
-; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
+; GFX1010-NEXT:    s_addc_u32 s13, s13, 0
+; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
+; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
 ; GFX1010-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GFX1010-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GFX1010-NEXT:    s_add_u32 s0, s0, s15
+; GFX1010-NEXT:    s_add_u32 s0, s0, s17
 ; GFX1010-NEXT:    s_addc_u32 s1, s1, 0
-; GFX1010-NEXT:    s_mov_b64 s[10:11], s[8:9]
-; GFX1010-NEXT:    s_mov_b64 s[8:9], s[6:7]
+; GFX1010-NEXT:    s_mov_b32 s13, s15
+; GFX1010-NEXT:    s_mov_b32 s12, s14
 ; GFX1010-NEXT:    v_or3_b32 v31, v0, v1, v2
-; GFX1010-NEXT:    s_getpc_b64 s[16:17]
-; GFX1010-NEXT:    s_add_u32 s16, s16, ex at rel32@lo+4
-; GFX1010-NEXT:    s_addc_u32 s17, s17, ex at rel32@hi+12
-; GFX1010-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1010-NEXT:    s_mov_b32 s14, s16
+; GFX1010-NEXT:    s_getpc_b64 s[18:19]
+; GFX1010-NEXT:    s_add_u32 s18, s18, ex at rel32@lo+4
+; GFX1010-NEXT:    s_addc_u32 s19, s19, ex at rel32@hi+12
+; GFX1010-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1010-NEXT:    s_endpgm
 ;
 ; GFX1100-LABEL: test_kern_call:
 ; GFX1100:       ; %bb.0: ; %entry
 ; GFX1100-NEXT:    v_mov_b32_e32 v31, v0
 ; GFX1100-NEXT:    s_mov_b32 s12, s13
-; GFX1100-NEXT:    s_mov_b64 s[10:11], s[4:5]
+; GFX1100-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; GFX1100-NEXT:    s_mov_b64 s[8:9], s[4:5]
 ; GFX1100-NEXT:    s_mov_b64 s[4:5], s[0:1]
-; GFX1100-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; GFX1100-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1100-NEXT:    s_mov_b32 s13, s14
 ; GFX1100-NEXT:    s_mov_b32 s14, s15
 ; GFX1100-NEXT:    s_mov_b32 s32, 0
-; GFX1100-NEXT:    s_getpc_b64 s[6:7]
-; GFX1100-NEXT:    s_add_u32 s6, s6, ex at rel32@lo+4
-; GFX1100-NEXT:    s_addc_u32 s7, s7, ex at rel32@hi+12
-; GFX1100-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GFX1100-NEXT:    s_getpc_b64 s[16:17]
+; GFX1100-NEXT:    s_add_u32 s16, s16, ex at rel32@lo+4
+; GFX1100-NEXT:    s_addc_u32 s17, s17, ex at rel32@hi+12
+; GFX1100-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1100-NEXT:    s_endpgm
 
 entry:
@@ -148,69 +152,72 @@ entry:
 define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
 ; GFX803-LABEL: test_kern_stack_and_call:
 ; GFX803:       ; %bb.0: ; %entry
-; GFX803-NEXT:    s_add_i32 s10, s10, s15
-; GFX803-NEXT:    s_lshr_b32 flat_scratch_hi, s10, 8
+; GFX803-NEXT:    s_add_i32 s12, s12, s17
+; GFX803-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX803-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GFX803-NEXT:    s_add_u32 s0, s0, s15
+; GFX803-NEXT:    s_add_u32 s0, s0, s17
 ; GFX803-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GFX803-NEXT:    v_or_b32_e32 v0, v0, v1
-; GFX803-NEXT:    s_mov_b32 flat_scratch_lo, s11
+; GFX803-NEXT:    s_mov_b32 flat_scratch_lo, s13
 ; GFX803-NEXT:    s_addc_u32 s1, s1, 0
-; GFX803-NEXT:    s_mov_b64 s[10:11], s[8:9]
+; GFX803-NEXT:    s_mov_b32 s13, s15
+; GFX803-NEXT:    s_mov_b32 s12, s14
 ; GFX803-NEXT:    v_mov_b32_e32 v3, 0
 ; GFX803-NEXT:    v_or_b32_e32 v31, v0, v2
-; GFX803-NEXT:    s_mov_b64 s[8:9], s[6:7]
+; GFX803-NEXT:    s_mov_b32 s14, s16
 ; GFX803-NEXT:    s_movk_i32 s32, 0x400
 ; GFX803-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:4
 ; GFX803-NEXT:    s_waitcnt vmcnt(0)
-; GFX803-NEXT:    s_getpc_b64 s[16:17]
-; GFX803-NEXT:    s_add_u32 s16, s16, ex at rel32@lo+4
-; GFX803-NEXT:    s_addc_u32 s17, s17, ex at rel32@hi+12
-; GFX803-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX803-NEXT:    s_getpc_b64 s[18:19]
+; GFX803-NEXT:    s_add_u32 s18, s18, ex at rel32@lo+4
+; GFX803-NEXT:    s_addc_u32 s19, s19, ex at rel32@hi+12
+; GFX803-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX803-NEXT:    s_endpgm
 ;
 ; GFX900-LABEL: test_kern_stack_and_call:
 ; GFX900:       ; %bb.0: ; %entry
-; GFX900-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
-; GFX900-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
-; GFX900-NEXT:    s_add_u32 s0, s0, s15
+; GFX900-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; GFX900-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; GFX900-NEXT:    s_add_u32 s0, s0, s17
 ; GFX900-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX900-NEXT:    s_addc_u32 s1, s1, 0
-; GFX900-NEXT:    s_mov_b64 s[10:11], s[8:9]
+; GFX900-NEXT:    s_mov_b32 s13, s15
+; GFX900-NEXT:    s_mov_b32 s12, s14
 ; GFX900-NEXT:    v_mov_b32_e32 v3, 0
 ; GFX900-NEXT:    v_or3_b32 v31, v0, v1, v2
-; GFX900-NEXT:    s_mov_b64 s[8:9], s[6:7]
+; GFX900-NEXT:    s_mov_b32 s14, s16
 ; GFX900-NEXT:    s_movk_i32 s32, 0x400
 ; GFX900-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:4
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    s_getpc_b64 s[16:17]
-; GFX900-NEXT:    s_add_u32 s16, s16, ex at rel32@lo+4
-; GFX900-NEXT:    s_addc_u32 s17, s17, ex at rel32@hi+12
-; GFX900-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX900-NEXT:    s_getpc_b64 s[18:19]
+; GFX900-NEXT:    s_add_u32 s18, s18, ex at rel32@lo+4
+; GFX900-NEXT:    s_addc_u32 s19, s19, ex at rel32@hi+12
+; GFX900-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX900-NEXT:    s_endpgm
 ;
 ; GFX1010-LABEL: test_kern_stack_and_call:
 ; GFX1010:       ; %bb.0: ; %entry
-; GFX1010-NEXT:    s_add_u32 s10, s10, s15
+; GFX1010-NEXT:    s_add_u32 s12, s12, s17
 ; GFX1010-NEXT:    s_movk_i32 s32, 0x200
-; GFX1010-NEXT:    s_addc_u32 s11, s11, 0
-; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
-; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
+; GFX1010-NEXT:    s_addc_u32 s13, s13, 0
+; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
+; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
 ; GFX1010-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GFX1010-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX1010-NEXT:    v_mov_b32_e32 v3, 0
-; GFX1010-NEXT:    s_add_u32 s0, s0, s15
+; GFX1010-NEXT:    s_add_u32 s0, s0, s17
 ; GFX1010-NEXT:    s_addc_u32 s1, s1, 0
-; GFX1010-NEXT:    s_mov_b64 s[10:11], s[8:9]
+; GFX1010-NEXT:    s_mov_b32 s13, s15
 ; GFX1010-NEXT:    v_or3_b32 v31, v0, v1, v2
-; GFX1010-NEXT:    s_mov_b64 s[8:9], s[6:7]
+; GFX1010-NEXT:    s_mov_b32 s12, s14
+; GFX1010-NEXT:    s_mov_b32 s14, s16
 ; GFX1010-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:4
 ; GFX1010-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX1010-NEXT:    s_getpc_b64 s[16:17]
-; GFX1010-NEXT:    s_add_u32 s16, s16, ex at rel32@lo+4
-; GFX1010-NEXT:    s_addc_u32 s17, s17, ex at rel32@hi+12
-; GFX1010-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1010-NEXT:    s_getpc_b64 s[18:19]
+; GFX1010-NEXT:    s_add_u32 s18, s18, ex at rel32@lo+4
+; GFX1010-NEXT:    s_addc_u32 s19, s19, ex at rel32@hi+12
+; GFX1010-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1010-NEXT:    s_endpgm
 ;
 ; GFX1100-LABEL: test_kern_stack_and_call:
@@ -218,18 +225,19 @@ define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
 ; GFX1100-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1100-NEXT:    v_mov_b32_e32 v31, v0
 ; GFX1100-NEXT:    s_mov_b32 s12, s13
-; GFX1100-NEXT:    s_mov_b64 s[10:11], s[4:5]
+; GFX1100-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; GFX1100-NEXT:    s_mov_b64 s[8:9], s[4:5]
 ; GFX1100-NEXT:    s_mov_b64 s[4:5], s[0:1]
-; GFX1100-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; GFX1100-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1100-NEXT:    s_mov_b32 s13, s14
 ; GFX1100-NEXT:    s_mov_b32 s14, s15
 ; GFX1100-NEXT:    s_mov_b32 s32, 16
 ; GFX1100-NEXT:    scratch_store_b32 off, v1, off offset:4 dlc
 ; GFX1100-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX1100-NEXT:    s_getpc_b64 s[6:7]
-; GFX1100-NEXT:    s_add_u32 s6, s6, ex at rel32@lo+4
-; GFX1100-NEXT:    s_addc_u32 s7, s7, ex at rel32@hi+12
-; GFX1100-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GFX1100-NEXT:    s_getpc_b64 s[16:17]
+; GFX1100-NEXT:    s_add_u32 s16, s16, ex at rel32@lo+4
+; GFX1100-NEXT:    s_addc_u32 s17, s17, ex at rel32@hi+12
+; GFX1100-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1100-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX1100-NEXT:    s_endpgm
 
@@ -313,80 +321,84 @@ entry:
 define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
 ; GFX803-LABEL: test_force_fp_kern_call:
 ; GFX803:       ; %bb.0: ; %entry
-; GFX803-NEXT:    s_add_i32 s10, s10, s15
-; GFX803-NEXT:    s_lshr_b32 flat_scratch_hi, s10, 8
+; GFX803-NEXT:    s_add_i32 s12, s12, s17
+; GFX803-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX803-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GFX803-NEXT:    s_add_u32 s0, s0, s15
+; GFX803-NEXT:    s_add_u32 s0, s0, s17
 ; GFX803-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GFX803-NEXT:    v_or_b32_e32 v0, v0, v1
-; GFX803-NEXT:    s_mov_b32 flat_scratch_lo, s11
+; GFX803-NEXT:    s_mov_b32 flat_scratch_lo, s13
 ; GFX803-NEXT:    s_addc_u32 s1, s1, 0
-; GFX803-NEXT:    s_mov_b64 s[10:11], s[8:9]
+; GFX803-NEXT:    s_mov_b32 s13, s15
+; GFX803-NEXT:    s_mov_b32 s12, s14
 ; GFX803-NEXT:    v_or_b32_e32 v31, v0, v2
-; GFX803-NEXT:    s_mov_b64 s[8:9], s[6:7]
+; GFX803-NEXT:    s_mov_b32 s14, s16
 ; GFX803-NEXT:    s_mov_b32 s32, 0
 ; GFX803-NEXT:    s_mov_b32 s33, 0
-; GFX803-NEXT:    s_getpc_b64 s[16:17]
-; GFX803-NEXT:    s_add_u32 s16, s16, ex at rel32@lo+4
-; GFX803-NEXT:    s_addc_u32 s17, s17, ex at rel32@hi+12
-; GFX803-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX803-NEXT:    s_getpc_b64 s[18:19]
+; GFX803-NEXT:    s_add_u32 s18, s18, ex at rel32@lo+4
+; GFX803-NEXT:    s_addc_u32 s19, s19, ex at rel32@hi+12
+; GFX803-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX803-NEXT:    s_endpgm
 ;
 ; GFX900-LABEL: test_force_fp_kern_call:
 ; GFX900:       ; %bb.0: ; %entry
-; GFX900-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
-; GFX900-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
-; GFX900-NEXT:    s_add_u32 s0, s0, s15
+; GFX900-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; GFX900-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; GFX900-NEXT:    s_add_u32 s0, s0, s17
 ; GFX900-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX900-NEXT:    s_addc_u32 s1, s1, 0
-; GFX900-NEXT:    s_mov_b64 s[10:11], s[8:9]
+; GFX900-NEXT:    s_mov_b32 s13, s15
+; GFX900-NEXT:    s_mov_b32 s12, s14
 ; GFX900-NEXT:    v_or3_b32 v31, v0, v1, v2
-; GFX900-NEXT:    s_mov_b64 s[8:9], s[6:7]
+; GFX900-NEXT:    s_mov_b32 s14, s16
 ; GFX900-NEXT:    s_mov_b32 s32, 0
 ; GFX900-NEXT:    s_mov_b32 s33, 0
-; GFX900-NEXT:    s_getpc_b64 s[16:17]
-; GFX900-NEXT:    s_add_u32 s16, s16, ex at rel32@lo+4
-; GFX900-NEXT:    s_addc_u32 s17, s17, ex at rel32@hi+12
-; GFX900-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX900-NEXT:    s_getpc_b64 s[18:19]
+; GFX900-NEXT:    s_add_u32 s18, s18, ex at rel32@lo+4
+; GFX900-NEXT:    s_addc_u32 s19, s19, ex at rel32@hi+12
+; GFX900-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX900-NEXT:    s_endpgm
 ;
 ; GFX1010-LABEL: test_force_fp_kern_call:
 ; GFX1010:       ; %bb.0: ; %entry
-; GFX1010-NEXT:    s_add_u32 s10, s10, s15
+; GFX1010-NEXT:    s_add_u32 s12, s12, s17
 ; GFX1010-NEXT:    s_mov_b32 s32, 0
 ; GFX1010-NEXT:    s_mov_b32 s33, 0
-; GFX1010-NEXT:    s_addc_u32 s11, s11, 0
-; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
-; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
+; GFX1010-NEXT:    s_addc_u32 s13, s13, 0
+; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
+; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
 ; GFX1010-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GFX1010-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GFX1010-NEXT:    s_add_u32 s0, s0, s15
+; GFX1010-NEXT:    s_add_u32 s0, s0, s17
 ; GFX1010-NEXT:    s_addc_u32 s1, s1, 0
-; GFX1010-NEXT:    s_mov_b64 s[10:11], s[8:9]
-; GFX1010-NEXT:    s_mov_b64 s[8:9], s[6:7]
+; GFX1010-NEXT:    s_mov_b32 s13, s15
+; GFX1010-NEXT:    s_mov_b32 s12, s14
 ; GFX1010-NEXT:    v_or3_b32 v31, v0, v1, v2
-; GFX1010-NEXT:    s_getpc_b64 s[16:17]
-; GFX1010-NEXT:    s_add_u32 s16, s16, ex at rel32@lo+4
-; GFX1010-NEXT:    s_addc_u32 s17, s17, ex at rel32@hi+12
-; GFX1010-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1010-NEXT:    s_mov_b32 s14, s16
+; GFX1010-NEXT:    s_getpc_b64 s[18:19]
+; GFX1010-NEXT:    s_add_u32 s18, s18, ex at rel32@lo+4
+; GFX1010-NEXT:    s_addc_u32 s19, s19, ex at rel32@hi+12
+; GFX1010-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1010-NEXT:    s_endpgm
 ;
 ; GFX1100-LABEL: test_force_fp_kern_call:
 ; GFX1100:       ; %bb.0: ; %entry
 ; GFX1100-NEXT:    v_mov_b32_e32 v31, v0
 ; GFX1100-NEXT:    s_mov_b32 s12, s13
-; GFX1100-NEXT:    s_mov_b64 s[10:11], s[4:5]
+; GFX1100-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; GFX1100-NEXT:    s_mov_b64 s[8:9], s[4:5]
 ; GFX1100-NEXT:    s_mov_b64 s[4:5], s[0:1]
-; GFX1100-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; GFX1100-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1100-NEXT:    s_mov_b32 s13, s14
 ; GFX1100-NEXT:    s_mov_b32 s14, s15
 ; GFX1100-NEXT:    s_mov_b32 s32, 0
 ; GFX1100-NEXT:    s_mov_b32 s33, 0
-; GFX1100-NEXT:    s_getpc_b64 s[6:7]
-; GFX1100-NEXT:    s_add_u32 s6, s6, ex at rel32@lo+4
-; GFX1100-NEXT:    s_addc_u32 s7, s7, ex at rel32@hi+12
-; GFX1100-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GFX1100-NEXT:    s_getpc_b64 s[16:17]
+; GFX1100-NEXT:    s_add_u32 s16, s16, ex at rel32@lo+4
+; GFX1100-NEXT:    s_addc_u32 s17, s17, ex at rel32@hi+12
+; GFX1100-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1100-NEXT:    s_endpgm
 ; GFX1010-NEXT    s_add_u32 s12, s12, s17
 ; GFX1010-NEXT    s_mov_b32 s32, 0
@@ -415,72 +427,75 @@ entry:
 define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_addr #2 {
 ; GFX803-LABEL: test_force_fp_kern_stack_and_call:
 ; GFX803:       ; %bb.0: ; %entry
-; GFX803-NEXT:    s_add_i32 s10, s10, s15
-; GFX803-NEXT:    s_lshr_b32 flat_scratch_hi, s10, 8
+; GFX803-NEXT:    s_add_i32 s12, s12, s17
+; GFX803-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
 ; GFX803-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GFX803-NEXT:    s_add_u32 s0, s0, s15
+; GFX803-NEXT:    s_add_u32 s0, s0, s17
 ; GFX803-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GFX803-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX803-NEXT:    s_mov_b32 s33, 0
-; GFX803-NEXT:    s_mov_b32 flat_scratch_lo, s11
+; GFX803-NEXT:    s_mov_b32 flat_scratch_lo, s13
 ; GFX803-NEXT:    s_addc_u32 s1, s1, 0
-; GFX803-NEXT:    s_mov_b64 s[10:11], s[8:9]
+; GFX803-NEXT:    s_mov_b32 s13, s15
+; GFX803-NEXT:    s_mov_b32 s12, s14
 ; GFX803-NEXT:    v_mov_b32_e32 v3, 0
 ; GFX803-NEXT:    v_or_b32_e32 v31, v0, v2
-; GFX803-NEXT:    s_mov_b64 s[8:9], s[6:7]
+; GFX803-NEXT:    s_mov_b32 s14, s16
 ; GFX803-NEXT:    s_movk_i32 s32, 0x400
 ; GFX803-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:4
 ; GFX803-NEXT:    s_waitcnt vmcnt(0)
-; GFX803-NEXT:    s_getpc_b64 s[16:17]
-; GFX803-NEXT:    s_add_u32 s16, s16, ex at rel32@lo+4
-; GFX803-NEXT:    s_addc_u32 s17, s17, ex at rel32@hi+12
-; GFX803-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX803-NEXT:    s_getpc_b64 s[18:19]
+; GFX803-NEXT:    s_add_u32 s18, s18, ex at rel32@lo+4
+; GFX803-NEXT:    s_addc_u32 s19, s19, ex at rel32@hi+12
+; GFX803-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX803-NEXT:    s_endpgm
 ;
 ; GFX900-LABEL: test_force_fp_kern_stack_and_call:
 ; GFX900:       ; %bb.0: ; %entry
-; GFX900-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
-; GFX900-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
-; GFX900-NEXT:    s_add_u32 s0, s0, s15
+; GFX900-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; GFX900-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; GFX900-NEXT:    s_add_u32 s0, s0, s17
 ; GFX900-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX900-NEXT:    s_mov_b32 s33, 0
 ; GFX900-NEXT:    s_addc_u32 s1, s1, 0
-; GFX900-NEXT:    s_mov_b64 s[10:11], s[8:9]
+; GFX900-NEXT:    s_mov_b32 s13, s15
+; GFX900-NEXT:    s_mov_b32 s12, s14
 ; GFX900-NEXT:    v_mov_b32_e32 v3, 0
 ; GFX900-NEXT:    v_or3_b32 v31, v0, v1, v2
-; GFX900-NEXT:    s_mov_b64 s[8:9], s[6:7]
+; GFX900-NEXT:    s_mov_b32 s14, s16
 ; GFX900-NEXT:    s_movk_i32 s32, 0x400
 ; GFX900-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:4
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    s_getpc_b64 s[16:17]
-; GFX900-NEXT:    s_add_u32 s16, s16, ex at rel32@lo+4
-; GFX900-NEXT:    s_addc_u32 s17, s17, ex at rel32@hi+12
-; GFX900-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX900-NEXT:    s_getpc_b64 s[18:19]
+; GFX900-NEXT:    s_add_u32 s18, s18, ex at rel32@lo+4
+; GFX900-NEXT:    s_addc_u32 s19, s19, ex at rel32@hi+12
+; GFX900-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX900-NEXT:    s_endpgm
 ;
 ; GFX1010-LABEL: test_force_fp_kern_stack_and_call:
 ; GFX1010:       ; %bb.0: ; %entry
-; GFX1010-NEXT:    s_add_u32 s10, s10, s15
+; GFX1010-NEXT:    s_add_u32 s12, s12, s17
 ; GFX1010-NEXT:    s_movk_i32 s32, 0x200
 ; GFX1010-NEXT:    s_mov_b32 s33, 0
-; GFX1010-NEXT:    s_addc_u32 s11, s11, 0
-; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
-; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
+; GFX1010-NEXT:    s_addc_u32 s13, s13, 0
+; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
+; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
 ; GFX1010-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GFX1010-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX1010-NEXT:    v_mov_b32_e32 v3, 0
-; GFX1010-NEXT:    s_add_u32 s0, s0, s15
+; GFX1010-NEXT:    s_add_u32 s0, s0, s17
 ; GFX1010-NEXT:    s_addc_u32 s1, s1, 0
-; GFX1010-NEXT:    s_mov_b64 s[10:11], s[8:9]
+; GFX1010-NEXT:    s_mov_b32 s13, s15
 ; GFX1010-NEXT:    v_or3_b32 v31, v0, v1, v2
-; GFX1010-NEXT:    s_mov_b64 s[8:9], s[6:7]
+; GFX1010-NEXT:    s_mov_b32 s12, s14
+; GFX1010-NEXT:    s_mov_b32 s14, s16
 ; GFX1010-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:4
 ; GFX1010-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX1010-NEXT:    s_getpc_b64 s[16:17]
-; GFX1010-NEXT:    s_add_u32 s16, s16, ex at rel32@lo+4
-; GFX1010-NEXT:    s_addc_u32 s17, s17, ex at rel32@hi+12
-; GFX1010-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1010-NEXT:    s_getpc_b64 s[18:19]
+; GFX1010-NEXT:    s_add_u32 s18, s18, ex at rel32@lo+4
+; GFX1010-NEXT:    s_addc_u32 s19, s19, ex at rel32@hi+12
+; GFX1010-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1010-NEXT:    s_endpgm
 ;
 ; GFX1100-LABEL: test_force_fp_kern_stack_and_call:
@@ -489,18 +504,19 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
 ; GFX1100-NEXT:    v_mov_b32_e32 v31, v0
 ; GFX1100-NEXT:    s_mov_b32 s33, 0
 ; GFX1100-NEXT:    s_mov_b32 s12, s13
-; GFX1100-NEXT:    s_mov_b64 s[10:11], s[4:5]
+; GFX1100-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; GFX1100-NEXT:    s_mov_b64 s[8:9], s[4:5]
 ; GFX1100-NEXT:    s_mov_b64 s[4:5], s[0:1]
-; GFX1100-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; GFX1100-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1100-NEXT:    s_mov_b32 s13, s14
 ; GFX1100-NEXT:    s_mov_b32 s14, s15
 ; GFX1100-NEXT:    s_mov_b32 s32, 16
 ; GFX1100-NEXT:    scratch_store_b32 off, v1, s33 offset:4 dlc
 ; GFX1100-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX1100-NEXT:    s_getpc_b64 s[6:7]
-; GFX1100-NEXT:    s_add_u32 s6, s6, ex at rel32@lo+4
-; GFX1100-NEXT:    s_addc_u32 s7, s7, ex at rel32@hi+12
-; GFX1100-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GFX1100-NEXT:    s_getpc_b64 s[16:17]
+; GFX1100-NEXT:    s_add_u32 s16, s16, ex at rel32@lo+4
+; GFX1100-NEXT:    s_addc_u32 s17, s17, ex at rel32@hi+12
+; GFX1100-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1100-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX1100-NEXT:    s_endpgm
 entry:

diff  --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
index 3b856ac63f76a..5d985850446cc 100644
--- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
@@ -168,28 +168,30 @@ bb1:
 define amdgpu_kernel void @v3i16_registers(i1 %cond) #0 {
 ; GCN-LABEL: v3i16_registers:
 ; GCN:       ; %bb.0: ; %entry
-; GCN-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
-; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
-; GCN-NEXT:    s_mov_b64 s[10:11], s[8:9]
-; GCN-NEXT:    s_load_dword s8, s[6:7], 0x0
-; GCN-NEXT:    s_add_u32 s0, s0, s15
+; GCN-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; GCN-NEXT:    s_load_dword s12, s[8:9], 0x0
+; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; GCN-NEXT:    s_add_u32 s0, s0, s17
 ; GCN-NEXT:    s_addc_u32 s1, s1, 0
 ; GCN-NEXT:    s_mov_b32 s32, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_bitcmp1_b32 s8, 0
-; GCN-NEXT:    s_cselect_b64 s[8:9], -1, 0
-; GCN-NEXT:    s_and_b64 vcc, exec, s[8:9]
+; GCN-NEXT:    s_bitcmp1_b32 s12, 0
+; GCN-NEXT:    s_cselect_b64 s[12:13], -1, 0
+; GCN-NEXT:    s_and_b64 vcc, exec, s[12:13]
 ; GCN-NEXT:    s_cbranch_vccnz .LBB4_2
 ; GCN-NEXT:  ; %bb.1: ; %if.else
-; GCN-NEXT:    s_add_u32 s8, s6, 8
+; GCN-NEXT:    s_add_u32 s8, s8, 8
 ; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GCN-NEXT:    s_addc_u32 s9, s7, 0
+; GCN-NEXT:    s_addc_u32 s9, s9, 0
 ; GCN-NEXT:    v_or3_b32 v31, v0, v1, v2
-; GCN-NEXT:    s_getpc_b64 s[6:7]
-; GCN-NEXT:    s_add_u32 s6, s6, func_v3i16 at rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s7, s7, func_v3i16 at rel32@hi+12
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GCN-NEXT:    s_mov_b32 s12, s14
+; GCN-NEXT:    s_mov_b32 s13, s15
+; GCN-NEXT:    s_mov_b32 s14, s16
+; GCN-NEXT:    s_getpc_b64 s[18:19]
+; GCN-NEXT:    s_add_u32 s18, s18, func_v3i16 at rel32@lo+4
+; GCN-NEXT:    s_addc_u32 s19, s19, func_v3i16 at rel32@hi+12
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GCN-NEXT:    s_branch .LBB4_3
 ; GCN-NEXT:  .LBB4_2:
 ; GCN-NEXT:    s_mov_b32 s4, 0
@@ -219,28 +221,30 @@ if.end:                                           ; preds = %if.else, %if.then
 define amdgpu_kernel void @v3f16_registers(i1 %cond) #0 {
 ; GCN-LABEL: v3f16_registers:
 ; GCN:       ; %bb.0: ; %entry
-; GCN-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
-; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
-; GCN-NEXT:    s_mov_b64 s[10:11], s[8:9]
-; GCN-NEXT:    s_load_dword s8, s[6:7], 0x0
-; GCN-NEXT:    s_add_u32 s0, s0, s15
+; GCN-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; GCN-NEXT:    s_load_dword s12, s[8:9], 0x0
+; GCN-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; GCN-NEXT:    s_add_u32 s0, s0, s17
 ; GCN-NEXT:    s_addc_u32 s1, s1, 0
 ; GCN-NEXT:    s_mov_b32 s32, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_bitcmp1_b32 s8, 0
-; GCN-NEXT:    s_cselect_b64 s[8:9], -1, 0
-; GCN-NEXT:    s_and_b64 vcc, exec, s[8:9]
+; GCN-NEXT:    s_bitcmp1_b32 s12, 0
+; GCN-NEXT:    s_cselect_b64 s[12:13], -1, 0
+; GCN-NEXT:    s_and_b64 vcc, exec, s[12:13]
 ; GCN-NEXT:    s_cbranch_vccnz .LBB5_2
 ; GCN-NEXT:  ; %bb.1: ; %if.else
-; GCN-NEXT:    s_add_u32 s8, s6, 8
+; GCN-NEXT:    s_add_u32 s8, s8, 8
 ; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GCN-NEXT:    s_addc_u32 s9, s7, 0
+; GCN-NEXT:    s_addc_u32 s9, s9, 0
 ; GCN-NEXT:    v_or3_b32 v31, v0, v1, v2
-; GCN-NEXT:    s_getpc_b64 s[6:7]
-; GCN-NEXT:    s_add_u32 s6, s6, func_v3f16 at rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s7, s7, func_v3f16 at rel32@hi+12
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GCN-NEXT:    s_mov_b32 s12, s14
+; GCN-NEXT:    s_mov_b32 s13, s15
+; GCN-NEXT:    s_mov_b32 s14, s16
+; GCN-NEXT:    s_getpc_b64 s[18:19]
+; GCN-NEXT:    s_add_u32 s18, s18, func_v3f16 at rel32@lo+4
+; GCN-NEXT:    s_addc_u32 s19, s19, func_v3f16 at rel32@hi+12
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GCN-NEXT:    s_branch .LBB5_3
 ; GCN-NEXT:  .LBB5_2:
 ; GCN-NEXT:    s_mov_b32 s4, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
index d847afd68f918..1ada8947b6f56 100644
--- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
@@ -992,7 +992,7 @@ define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 a
 ;
 ; GFX9-LABEL: load_i8_to_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_ubyte v0, v0, s[2:3]
@@ -1073,7 +1073,7 @@ define amdgpu_kernel void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias
 ;
 ; GFX9-LABEL: load_v2i8_to_v2f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1162,7 +1162,7 @@ define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias
 ;
 ; GFX9-LABEL: load_v3i8_to_v3f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v3, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1254,7 +1254,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias
 ;
 ; GFX9-LABEL: load_v4i8_to_v4f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1378,7 +1378,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)
 ;
 ; GFX9-LABEL: load_v4i8_to_v4f32_unaligned:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1542,14 +1542,14 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n
 ;
 ; GFX9-LABEL: load_v4i8_to_v4f32_2_uses:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x34
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x34
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v5, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v6, 9
-; GFX9-NEXT:    s_movk_i32 s4, 0x900
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v4, v0, s[0:1]
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT:    s_movk_i32 s4, 0x900
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 24, v4
 ; GFX9-NEXT:    v_cvt_f32_ubyte3_e32 v3, v4
@@ -1739,7 +1739,7 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias
 ;
 ; GFX9-LABEL: load_v7i8_to_v7f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v10, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1879,7 +1879,7 @@ define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias
 ;
 ; GFX9-LABEL: load_v8i8_to_v8f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v9, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1979,7 +1979,7 @@ define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias
 ;
 ; GFX9-LABEL: i8_zext_inreg_i32_to_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2063,7 +2063,7 @@ define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias
 ;
 ; GFX9-LABEL: i8_zext_inreg_hi1_to_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2143,7 +2143,7 @@ define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out,
 ;
 ; GFX9-LABEL: i8_zext_i32_to_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_ubyte v0, v0, s[2:3]
@@ -2256,7 +2256,7 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* no
 ;
 ; GFX9-LABEL: v4i8_zext_v4i32_to_v4f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2355,7 +2355,7 @@ define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out
 ;
 ; GFX9-LABEL: extract_byte0_to_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2435,7 +2435,7 @@ define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out
 ;
 ; GFX9-LABEL: extract_byte1_to_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2516,7 +2516,7 @@ define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out
 ;
 ; GFX9-LABEL: extract_byte2_to_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2597,7 +2597,7 @@ define amdgpu_kernel void @extract_byte3_to_f32(float addrspace(1)* noalias %out
 ;
 ; GFX9-LABEL: extract_byte3_to_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2685,7 +2685,7 @@ define amdgpu_kernel void @cvt_ubyte0_or_multiuse(i32 addrspace(1)* %in, float a
 ;
 ; GFX9-LABEL: cvt_ubyte0_or_multiuse:
 ; GFX9:       ; %bb.0: ; %bb
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)

diff  --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll
index 36b46140e41e5..cf80833981436 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll
@@ -20,16 +20,16 @@ define void @main(float %arg) {
   ; CHECK-NEXT:   successors: %bb.2(0x80000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
-  ; CHECK-NEXT:   [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = contract reassoc nofpexcept V_FMAC_F32_e64 0, [[S_MOV_B32_]], 0, [[S_MOV_B32_]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
-  ; CHECK-NEXT:   [[V_FMAC_F32_e64_1:%[0-9]+]]:vgpr_32 = contract reassoc nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[V_FMAC_F32_e64_]], 0, 0, implicit $mode, implicit $exec
-  ; CHECK-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = contract reassoc nofpexcept V_ADD_F32_e64 0, [[V_FMAC_F32_e64_1]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   %20:vgpr_32 = contract reassoc nofpexcept V_FMAC_F32_e64 0, [[S_MOV_B32_]], 0, [[S_MOV_B32_]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   %1:vgpr_32 = contract reassoc nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY]], 0, %20, 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   %2:vgpr_32 = contract reassoc nofpexcept V_ADD_F32_e64 0, %1, 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
   ; CHECK-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.bb11:
   ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, [[V_FMAC_F32_e64_1]], %bb.1
-  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, [[V_ADD_F32_e64_]], %bb.1
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %1, %bb.1
+  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %2, %bb.1
   ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_1]], %bb.0, [[S_MOV_B32_2]], %bb.1
   ; CHECK-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[PHI2]], implicit $exec
   ; CHECK-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 1

diff  --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
index ed9413b3f6c03..4a7a328d33c61 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
@@ -1337,31 +1337,33 @@ define amdgpu_kernel void @ds_read_call_read(i32 addrspace(1)* %out, i32 addrspa
 ; CI-NEXT:    s_getpc_b64 s[40:41]
 ; CI-NEXT:    s_mov_b32 s40, s0
 ; CI-NEXT:    s_load_dwordx4 s[40:43], s[40:41], 0x0
-; CI-NEXT:    s_mov_b64 s[10:11], s[4:5]
-; CI-NEXT:    s_load_dwordx2 s[36:37], s[2:3], 0x0
-; CI-NEXT:    s_load_dword s4, s[2:3], 0x2
-; CI-NEXT:    s_mov_b32 s14, s8
+; CI-NEXT:    s_mov_b32 s14, s10
+; CI-NEXT:    s_mov_b32 s12, s8
+; CI-NEXT:    s_mov_b32 s13, s9
 ; CI-NEXT:    v_lshlrev_b32_e32 v3, 2, v0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_add_u32 s40, s40, s9
+; CI-NEXT:    s_add_u32 s40, s40, s11
+; CI-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; CI-NEXT:    s_load_dwordx2 s[36:37], s[4:5], 0x0
+; CI-NEXT:    s_load_dword s6, s[4:5], 0x2
 ; CI-NEXT:    s_addc_u32 s41, s41, 0
-; CI-NEXT:    s_add_u32 s8, s2, 12
-; CI-NEXT:    s_addc_u32 s9, s3, 0
-; CI-NEXT:    s_getpc_b64 s[2:3]
-; CI-NEXT:    s_add_u32 s2, s2, void_func_void at gotpcrel32@lo+4
-; CI-NEXT:    s_addc_u32 s3, s3, void_func_void at gotpcrel32@hi+12
-; CI-NEXT:    v_add_i32_e32 v40, vcc, s4, v3
+; CI-NEXT:    s_add_u32 s8, s4, 12
+; CI-NEXT:    s_addc_u32 s9, s5, 0
+; CI-NEXT:    s_getpc_b64 s[4:5]
+; CI-NEXT:    s_add_u32 s4, s4, void_func_void at gotpcrel32@lo+4
+; CI-NEXT:    s_addc_u32 s5, s5, void_func_void at gotpcrel32@hi+12
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_add_i32_e32 v40, vcc, s6, v3
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    s_load_dwordx2 s[16:17], s[2:3], 0x0
+; CI-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
 ; CI-NEXT:    ds_read_b32 v41, v40
 ; CI-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; CI-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; CI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; CI-NEXT:    s_mov_b64 s[4:5], s[0:1]
+; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    s_mov_b64 s[0:1], s[40:41]
 ; CI-NEXT:    v_or_b32_e32 v31, v0, v2
-; CI-NEXT:    s_mov_b32 s12, s6
-; CI-NEXT:    s_mov_b32 s13, s7
 ; CI-NEXT:    s_mov_b64 s[2:3], s[42:43]
 ; CI-NEXT:    s_mov_b32 s32, 0
 ; CI-NEXT:    s_mov_b32 s39, 0xf000
@@ -1379,28 +1381,30 @@ define amdgpu_kernel void @ds_read_call_read(i32 addrspace(1)* %out, i32 addrspa
 ; GFX9-NEXT:    s_getpc_b64 s[36:37]
 ; GFX9-NEXT:    s_mov_b32 s36, s0
 ; GFX9-NEXT:    s_load_dwordx4 s[36:39], s[36:37], 0x0
-; GFX9-NEXT:    s_mov_b64 s[10:11], s[4:5]
-; GFX9-NEXT:    s_load_dword s4, s[2:3], 0x8
-; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[2:3], 0x0
-; GFX9-NEXT:    s_mov_b32 s14, s8
+; GFX9-NEXT:    s_mov_b32 s14, s10
+; GFX9-NEXT:    s_mov_b32 s12, s8
+; GFX9-NEXT:    s_mov_b32 s13, s9
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_add_u32 s36, s36, s9
+; GFX9-NEXT:    s_add_u32 s36, s36, s11
 ; GFX9-NEXT:    s_addc_u32 s37, s37, 0
-; GFX9-NEXT:    s_add_u32 s8, s2, 12
-; GFX9-NEXT:    s_addc_u32 s9, s3, 0
-; GFX9-NEXT:    s_getpc_b64 s[2:3]
-; GFX9-NEXT:    s_add_u32 s2, s2, void_func_void at gotpcrel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s3, s3, void_func_void at gotpcrel32@hi+12
-; GFX9-NEXT:    v_lshl_add_u32 v41, v0, 2, s4
-; GFX9-NEXT:    s_load_dwordx2 s[16:17], s[2:3], 0x0
+; GFX9-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; GFX9-NEXT:    s_load_dword s6, s[4:5], 0x8
+; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[4:5], 0x0
+; GFX9-NEXT:    s_add_u32 s8, s4, 12
+; GFX9-NEXT:    s_addc_u32 s9, s5, 0
+; GFX9-NEXT:    s_getpc_b64 s[4:5]
+; GFX9-NEXT:    s_add_u32 s4, s4, void_func_void at gotpcrel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s5, s5, void_func_void at gotpcrel32@hi+12
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_lshl_add_u32 v41, v0, 2, s6
+; GFX9-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
 ; GFX9-NEXT:    ds_read_b32 v42, v41
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX9-NEXT:    s_mov_b64 s[4:5], s[0:1]
+; GFX9-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT:    s_mov_b32 s12, s6
-; GFX9-NEXT:    s_mov_b32 s13, s7
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v40, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
index e77268223dd60..94fe3f0e9fc11 100644
--- a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
@@ -49,19 +49,19 @@ define weak_odr void @test(i32 %0) !dbg !34 {
 ; CHECK-NEXT:    s_mov_b64 s[4:5], s[40:41]
 ; CHECK-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
 ; CHECK-NEXT:    v_mov_b32_e32 v41, v31
-; CHECK-NEXT:    s_mov_b64 s[34:35], s[6:7]
 ; CHECK-NEXT:    s_mov_b32 s42, s15
 ; CHECK-NEXT:    s_mov_b32 s43, s14
 ; CHECK-NEXT:    s_mov_b32 s44, s13
 ; CHECK-NEXT:    s_mov_b32 s45, s12
-; CHECK-NEXT:    s_mov_b64 s[36:37], s[10:11]
-; CHECK-NEXT:    s_mov_b64 s[38:39], s[8:9]
+; CHECK-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; CHECK-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; CHECK-NEXT:    s_mov_b64 s[38:39], s[6:7]
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[46:47]
 ; CHECK-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT:    s_mov_b64 s[6:7], s[34:35]
-; CHECK-NEXT:    s_mov_b64 s[8:9], s[38:39]
-; CHECK-NEXT:    s_mov_b64 s[10:11], s[36:37]
+; CHECK-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; CHECK-NEXT:    s_mov_b64 s[10:11], s[34:35]
 ; CHECK-NEXT:    s_mov_b32 s12, s45
 ; CHECK-NEXT:    s_mov_b32 s13, s44
 ; CHECK-NEXT:    s_mov_b32 s14, s43

diff  --git a/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll b/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll
index 544d10f059741..1cbde24c96a8b 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll
@@ -13,7 +13,7 @@
 
 ; NONE:   OS/ABI: SystemV       (0x0)
 ; HSA:    OS/ABI: AMDGPU_HSA    (0x40)
-; HSA:    ABIVersion: 3
+; HSA:    ABIVersion: 2
 ; PAL:    OS/ABI: AMDGPU_PAL    (0x41)
 ; PAL:    ABIVersion: 0
 ; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42)

diff  --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
index d4c7966b13c4d..91f3e3581fa2c 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
@@ -64,43 +64,45 @@ define amdgpu_kernel void @stack_object_in_kernel_no_calls() {
 define amdgpu_kernel void @kernel_calls_no_stack() {
 ; FLAT_SCR_OPT-LABEL: kernel_calls_no_stack:
 ; FLAT_SCR_OPT:       ; %bb.0:
-; FLAT_SCR_OPT-NEXT:    s_add_u32 s6, s6, s11
+; FLAT_SCR_OPT-NEXT:    s_add_u32 s8, s8, s13
 ; FLAT_SCR_OPT-NEXT:    s_mov_b32 s32, 0
-; FLAT_SCR_OPT-NEXT:    s_addc_u32 s7, s7, 0
-; FLAT_SCR_OPT-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
-; FLAT_SCR_OPT-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
-; FLAT_SCR_OPT-NEXT:    s_mov_b32 s14, s10
-; FLAT_SCR_OPT-NEXT:    s_mov_b64 s[10:11], s[4:5]
+; FLAT_SCR_OPT-NEXT:    s_addc_u32 s9, s9, 0
+; FLAT_SCR_OPT-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
+; FLAT_SCR_OPT-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
+; FLAT_SCR_OPT-NEXT:    s_mov_b64 s[8:9], s[4:5]
 ; FLAT_SCR_OPT-NEXT:    s_getpc_b64 s[4:5]
 ; FLAT_SCR_OPT-NEXT:    s_add_u32 s4, s4, extern_func at gotpcrel32@lo+4
 ; FLAT_SCR_OPT-NEXT:    s_addc_u32 s5, s5, extern_func at gotpcrel32@hi+12
 ; FLAT_SCR_OPT-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; FLAT_SCR_OPT-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x0
+; FLAT_SCR_OPT-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
 ; FLAT_SCR_OPT-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; FLAT_SCR_OPT-NEXT:    s_mov_b32 s13, s9
-; FLAT_SCR_OPT-NEXT:    s_mov_b32 s12, s8
-; FLAT_SCR_OPT-NEXT:    s_mov_b64 s[4:5], s[0:1]
-; FLAT_SCR_OPT-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; FLAT_SCR_OPT-NEXT:    s_mov_b32 s14, s12
+; FLAT_SCR_OPT-NEXT:    s_mov_b32 s13, s11
+; FLAT_SCR_OPT-NEXT:    s_mov_b32 s12, s10
+; FLAT_SCR_OPT-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; FLAT_SCR_OPT-NEXT:    v_or3_b32 v31, v0, v1, v2
+; FLAT_SCR_OPT-NEXT:    s_mov_b64 s[4:5], s[0:1]
+; FLAT_SCR_OPT-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; FLAT_SCR_OPT-NEXT:    s_waitcnt lgkmcnt(0)
-; FLAT_SCR_OPT-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; FLAT_SCR_OPT-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; FLAT_SCR_OPT-NEXT:    s_endpgm
 ;
 ; FLAT_SCR_ARCH-LABEL: kernel_calls_no_stack:
 ; FLAT_SCR_ARCH:       ; %bb.0:
-; FLAT_SCR_ARCH-NEXT:    s_mov_b64 s[10:11], s[4:5]
+; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s13, s9
+; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s12, s8
+; FLAT_SCR_ARCH-NEXT:    s_mov_b64 s[8:9], s[4:5]
 ; FLAT_SCR_ARCH-NEXT:    s_getpc_b64 s[4:5]
 ; FLAT_SCR_ARCH-NEXT:    s_add_u32 s4, s4, extern_func at gotpcrel32@lo+4
 ; FLAT_SCR_ARCH-NEXT:    s_addc_u32 s5, s5, extern_func at gotpcrel32@hi+12
 ; FLAT_SCR_ARCH-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; FLAT_SCR_ARCH-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
 ; FLAT_SCR_ARCH-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s14, s8
+; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s14, s10
+; FLAT_SCR_ARCH-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; FLAT_SCR_ARCH-NEXT:    s_mov_b64 s[4:5], s[0:1]
-; FLAT_SCR_ARCH-NEXT:    s_mov_b64 s[8:9], s[2:3]
-; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s12, s6
+; FLAT_SCR_ARCH-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; FLAT_SCR_ARCH-NEXT:    v_or3_b32 v31, v0, v1, v2
-; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s13, s7
 ; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s32, 0
 ; FLAT_SCR_ARCH-NEXT:    s_waitcnt lgkmcnt(0)
 ; FLAT_SCR_ARCH-NEXT:    s_swappc_b64 s[30:31], s[16:17]

diff  --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
index fbe7a9f9b7b65..c4133d87795ec 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
@@ -1,4 +1,4 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn --amdhsa-code-object-version=4 -verify-machineinstrs < %s | FileCheck --check-prefixes=SI,FUNC %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=SI,FUNC %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefixes=VI,FUNC %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=redwood < %s | FileCheck --check-prefixes=R600,FUNC %s
 

diff  --git a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
index 633e7f5318eec..e48d4e5a0ee3d 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
@@ -36,7 +36,6 @@
 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
 ; GCN-NEXT: .amdhsa_wavefront_size32
-; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
 ; GCN-NEXT: .amdhsa_enable_private_segment 0
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
@@ -65,7 +64,6 @@ define amdgpu_kernel void @minimal_kernel_inputs() {
 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
 ; GCN-NEXT: .amdhsa_wavefront_size32
-; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
 ; GCN-NEXT: .amdhsa_enable_private_segment 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
@@ -83,7 +81,7 @@ define amdgpu_kernel void @minimal_kernel_inputs_with_stack() {
 }
 
 ; GCN-LABEL: {{^}}queue_ptr:
-; ; GCN: global_load_u8 v{{[0-9]+}},
+; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[0:1]
 
 ; WORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s15
 ; NOWORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s2
@@ -93,12 +91,11 @@ define amdgpu_kernel void @minimal_kernel_inputs_with_stack() {
 ; WORKAROUND: .amdhsa_user_sgpr_count 15
 ; NOWORKAROUND: .amdhsa_user_sgpr_count 2
 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
-; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 0
-; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
+; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 1
+; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
 ; GCN-NEXT: .amdhsa_wavefront_size32
-; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
 ; GCN-NEXT: .amdhsa_enable_private_segment 0
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
@@ -120,16 +117,16 @@ define amdgpu_kernel void @queue_ptr() {
 ; WORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s14
 ; WORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s15
 
-; NOWORKAROUND: v_mov_b32_e32 [[V_X:v[0-9]+]], s6
-; NOWORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s7
-; NOWORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s8
+; NOWORKAROUND: v_mov_b32_e32 [[V_X:v[0-9]+]], s8
+; NOWORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s9
+; NOWORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s10
 
 ; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[0:1]
-; GCN: global_load_u8 v{{[0-9]+}},
 ; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[2:3]
+; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[4:5]
 
-; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_LO:[0-9]+]], s4
-; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_HI:[0-9]+]], s5
+; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_LO:[0-9]+]], s6
+; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_HI:[0-9]+]], s7
 
 ; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_X]], off
 ; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_Y]], off
@@ -138,14 +135,13 @@ define amdgpu_kernel void @queue_ptr() {
 
 ; GCN: .amdhsa_kernel all_inputs
 ; WORKAROUND: .amdhsa_user_sgpr_count 13
-; NOWORKAROUND: .amdhsa_user_sgpr_count 6
+; NOWORKAROUND: .amdhsa_user_sgpr_count 8
 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
-; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 1
 ; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 1
 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
 ; GCN-NEXT: .amdhsa_wavefront_size32
-; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
 ; GCN-NEXT: .amdhsa_enable_private_segment 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
@@ -153,7 +149,7 @@ define amdgpu_kernel void @queue_ptr() {
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_info 0
 ; GCN-NEXT: .amdhsa_system_vgpr_workitem_id 0
 ; WORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 13
-; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 6
+; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 8
 define amdgpu_kernel void @all_inputs() {
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, i32 addrspace(5)* %alloca

diff  --git a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
index e4e23f28407ed..df8e7a15a65f1 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
@@ -9,45 +9,48 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_mov_b32 s32, 0x180000
 ; CHECK-NEXT:    s_mov_b32 s33, 0
-; CHECK-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
-; CHECK-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
-; CHECK-NEXT:    s_add_u32 s0, s0, s15
+; CHECK-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; CHECK-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; CHECK-NEXT:    s_add_u32 s0, s0, s17
 ; CHECK-NEXT:    s_addc_u32 s1, s1, 0
-; CHECK-NEXT:    s_mov_b64 s[10:11], s[8:9]
+; CHECK-NEXT:    v_writelane_b32 v40, s16, 0
+; CHECK-NEXT:    s_mov_b32 s13, s15
+; CHECK-NEXT:    s_mov_b32 s12, s14
+; CHECK-NEXT:    v_readlane_b32 s14, v40, 0
+; CHECK-NEXT:    s_mov_b64 s[16:17], s[8:9]
 ; CHECK-NEXT:    v_mov_b32_e32 v3, v2
 ; CHECK-NEXT:    v_mov_b32_e32 v2, v1
 ; CHECK-NEXT:    v_mov_b32_e32 v1, v0
-; CHECK-NEXT:    s_load_dword s8, s[6:7], 0x0
+; CHECK-NEXT:    s_load_dword s8, s[16:17], 0x0
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    v_writelane_b32 v40, s8, 0
+; CHECK-NEXT:    v_writelane_b32 v40, s8, 1
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def vgpr10
 ; CHECK-NEXT:    ;;#ASMEND
 ; CHECK-NEXT:    s_add_i32 s8, s33, 0x100100
 ; CHECK-NEXT:    buffer_store_dword v10, off, s[0:3], s8 ; 4-byte Folded Spill
-; CHECK-NEXT:    s_mov_b64 s[16:17], 8
-; CHECK-NEXT:    s_mov_b32 s8, s6
-; CHECK-NEXT:    s_mov_b32 s6, s7
-; CHECK-NEXT:    s_mov_b32 s9, s16
-; CHECK-NEXT:    s_mov_b32 s7, s17
-; CHECK-NEXT:    s_add_u32 s8, s8, s9
-; CHECK-NEXT:    s_addc_u32 s6, s6, s7
+; CHECK-NEXT:    s_mov_b64 s[18:19], 8
+; CHECK-NEXT:    s_mov_b32 s8, s16
+; CHECK-NEXT:    s_mov_b32 s9, s17
+; CHECK-NEXT:    s_mov_b32 s16, s18
+; CHECK-NEXT:    s_mov_b32 s15, s19
+; CHECK-NEXT:    s_add_u32 s8, s8, s16
+; CHECK-NEXT:    s_addc_u32 s15, s9, s15
 ; CHECK-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9
-; CHECK-NEXT:    s_mov_b32 s9, s6
+; CHECK-NEXT:    s_mov_b32 s9, s15
 ; CHECK-NEXT:    v_mov_b32_e32 v0, 0x2000
-; CHECK-NEXT:    ; implicit-def: $sgpr6
-; CHECK-NEXT:    s_getpc_b64 s[6:7]
-; CHECK-NEXT:    s_add_u32 s6, s6, device_func at gotpcrel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s7, s7, device_func at gotpcrel32@hi+12
-; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[6:7], 0x0
+; CHECK-NEXT:    ; implicit-def: $sgpr15
+; CHECK-NEXT:    s_getpc_b64 s[16:17]
+; CHECK-NEXT:    s_add_u32 s16, s16, device_func at gotpcrel32@lo+4
+; CHECK-NEXT:    s_addc_u32 s17, s17, device_func at gotpcrel32@hi+12
+; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[16:17], 0x0
 ; CHECK-NEXT:    s_mov_b64 s[22:23], s[2:3]
 ; CHECK-NEXT:    s_mov_b64 s[20:21], s[0:1]
-; CHECK-NEXT:    s_mov_b32 s6, 20
-; CHECK-NEXT:    v_lshlrev_b32_e64 v3, s6, v3
-; CHECK-NEXT:    s_mov_b32 s6, 10
-; CHECK-NEXT:    v_lshlrev_b32_e64 v2, s6, v2
+; CHECK-NEXT:    s_mov_b32 s15, 20
+; CHECK-NEXT:    v_lshlrev_b32_e64 v3, s15, v3
+; CHECK-NEXT:    s_mov_b32 s15, 10
+; CHECK-NEXT:    v_lshlrev_b32_e64 v2, s15, v2
 ; CHECK-NEXT:    v_or3_b32 v31, v1, v2, v3
-; CHECK-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; CHECK-NEXT:    ; implicit-def: $sgpr15
 ; CHECK-NEXT:    s_mov_b64 s[0:1], s[20:21]
 ; CHECK-NEXT:    s_mov_b64 s[2:3], s[22:23]
@@ -55,7 +58,7 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; CHECK-NEXT:    s_add_i32 s4, s33, 0x100100
 ; CHECK-NEXT:    buffer_load_dword v10, off, s[0:3], s4 ; 4-byte Folded Reload
-; CHECK-NEXT:    v_readlane_b32 s4, v40, 0
+; CHECK-NEXT:    v_readlane_b32 s4, v40, 1
 ; CHECK-NEXT:    s_mov_b32 s5, 0
 ; CHECK-NEXT:    s_cmp_eq_u32 s4, s5
 ; CHECK-NEXT:    v_mov_b32_e32 v0, 0x4000

diff  --git a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
index 7e94b28c5acfc..66e2bfaeeb444 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
@@ -59,20 +59,19 @@ define amdgpu_kernel void @module_0_kernel_normal_extern_normal(i32 %idx) #1 {
 define amdgpu_kernel void @module_1_kernel_normal_extern_normal(i32 %idx) {
 ; CHECK-LABEL: module_1_kernel_normal_extern_normal:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_add_u32 s6, s6, s9
+; CHECK-NEXT:    s_add_u32 s8, s8, s11
 ; CHECK-NEXT:    s_mov_b32 s32, 0
-; CHECK-NEXT:    s_addc_u32 s7, s7, 0
-; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
-; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
-; CHECK-NEXT:    s_add_u32 s0, s0, s9
+; CHECK-NEXT:    s_addc_u32 s9, s9, 0
+; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
+; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
+; CHECK-NEXT:    s_add_u32 s0, s0, s11
 ; CHECK-NEXT:    s_addc_u32 s1, s1, 0
-; CHECK-NEXT:    s_add_u32 s8, s4, 8
-; CHECK-NEXT:    s_addc_u32 s9, s5, 0
-; CHECK-NEXT:    s_getpc_b64 s[6:7]
-; CHECK-NEXT:    s_add_u32 s6, s6, use_module at gotpcrel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s7, s7, use_module at gotpcrel32@hi+12
-; CHECK-NEXT:    s_load_dwordx2 s[10:11], s[6:7], 0x0
-; CHECK-NEXT:    s_load_dword s12, s[4:5], 0x0
+; CHECK-NEXT:    s_getpc_b64 s[8:9]
+; CHECK-NEXT:    s_add_u32 s8, s8, use_module at gotpcrel32@lo+4
+; CHECK-NEXT:    s_addc_u32 s9, s9, use_module at gotpcrel32@hi+12
+; CHECK-NEXT:    s_load_dwordx2 s[10:11], s[8:9], 0x0
+; CHECK-NEXT:    s_load_dword s12, s[6:7], 0x0
+; CHECK-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[10:11]
 ; CHECK-NEXT:    s_lshl_b32 s4, s12, 2
@@ -118,20 +117,19 @@ define amdgpu_kernel void @module_0_kernel_overalign_extern_normal(i32 %idx) #1
 define amdgpu_kernel void @module_1_kernel_overalign_extern_normal(i32 %idx) {
 ; CHECK-LABEL: module_1_kernel_overalign_extern_normal:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_add_u32 s6, s6, s9
+; CHECK-NEXT:    s_add_u32 s8, s8, s11
 ; CHECK-NEXT:    s_mov_b32 s32, 0
-; CHECK-NEXT:    s_addc_u32 s7, s7, 0
-; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
-; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
-; CHECK-NEXT:    s_add_u32 s0, s0, s9
+; CHECK-NEXT:    s_addc_u32 s9, s9, 0
+; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
+; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
+; CHECK-NEXT:    s_add_u32 s0, s0, s11
 ; CHECK-NEXT:    s_addc_u32 s1, s1, 0
-; CHECK-NEXT:    s_add_u32 s8, s4, 8
-; CHECK-NEXT:    s_addc_u32 s9, s5, 0
-; CHECK-NEXT:    s_getpc_b64 s[6:7]
-; CHECK-NEXT:    s_add_u32 s6, s6, use_module at gotpcrel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s7, s7, use_module at gotpcrel32@hi+12
-; CHECK-NEXT:    s_load_dwordx2 s[10:11], s[6:7], 0x0
-; CHECK-NEXT:    s_load_dword s12, s[4:5], 0x0
+; CHECK-NEXT:    s_getpc_b64 s[8:9]
+; CHECK-NEXT:    s_add_u32 s8, s8, use_module at gotpcrel32@lo+4
+; CHECK-NEXT:    s_addc_u32 s9, s9, use_module at gotpcrel32@hi+12
+; CHECK-NEXT:    s_load_dwordx2 s[10:11], s[8:9], 0x0
+; CHECK-NEXT:    s_load_dword s12, s[6:7], 0x0
+; CHECK-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[10:11]
 ; CHECK-NEXT:    s_lshl_b32 s4, s12, 2
@@ -177,20 +175,19 @@ define amdgpu_kernel void @module_0_kernel_normal_extern_overalign(i32 %idx) #1
 define amdgpu_kernel void @module_1_kernel_normal_extern_overalign(i32 %idx) {
 ; CHECK-LABEL: module_1_kernel_normal_extern_overalign:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_add_u32 s6, s6, s9
+; CHECK-NEXT:    s_add_u32 s8, s8, s11
 ; CHECK-NEXT:    s_mov_b32 s32, 0
-; CHECK-NEXT:    s_addc_u32 s7, s7, 0
-; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
-; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
-; CHECK-NEXT:    s_add_u32 s0, s0, s9
+; CHECK-NEXT:    s_addc_u32 s9, s9, 0
+; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
+; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
+; CHECK-NEXT:    s_add_u32 s0, s0, s11
 ; CHECK-NEXT:    s_addc_u32 s1, s1, 0
-; CHECK-NEXT:    s_add_u32 s8, s4, 8
-; CHECK-NEXT:    s_addc_u32 s9, s5, 0
-; CHECK-NEXT:    s_getpc_b64 s[6:7]
-; CHECK-NEXT:    s_add_u32 s6, s6, use_module at gotpcrel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s7, s7, use_module at gotpcrel32@hi+12
-; CHECK-NEXT:    s_load_dwordx2 s[10:11], s[6:7], 0x0
-; CHECK-NEXT:    s_load_dword s12, s[4:5], 0x0
+; CHECK-NEXT:    s_getpc_b64 s[8:9]
+; CHECK-NEXT:    s_add_u32 s8, s8, use_module at gotpcrel32@lo+4
+; CHECK-NEXT:    s_addc_u32 s9, s9, use_module at gotpcrel32@hi+12
+; CHECK-NEXT:    s_load_dwordx2 s[10:11], s[8:9], 0x0
+; CHECK-NEXT:    s_load_dword s12, s[6:7], 0x0
+; CHECK-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[10:11]
 ; CHECK-NEXT:    s_lshl_b32 s4, s12, 2
@@ -236,20 +233,19 @@ define amdgpu_kernel void @module_0_kernel_overalign_extern_overalign(i32 %idx)
 define amdgpu_kernel void @module_1_kernel_overalign_extern_overalign(i32 %idx) {
 ; CHECK-LABEL: module_1_kernel_overalign_extern_overalign:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_add_u32 s6, s6, s9
+; CHECK-NEXT:    s_add_u32 s8, s8, s11
 ; CHECK-NEXT:    s_mov_b32 s32, 0
-; CHECK-NEXT:    s_addc_u32 s7, s7, 0
-; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
-; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
-; CHECK-NEXT:    s_add_u32 s0, s0, s9
+; CHECK-NEXT:    s_addc_u32 s9, s9, 0
+; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
+; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
+; CHECK-NEXT:    s_add_u32 s0, s0, s11
 ; CHECK-NEXT:    s_addc_u32 s1, s1, 0
-; CHECK-NEXT:    s_add_u32 s8, s4, 8
-; CHECK-NEXT:    s_addc_u32 s9, s5, 0
-; CHECK-NEXT:    s_getpc_b64 s[6:7]
-; CHECK-NEXT:    s_add_u32 s6, s6, use_module at gotpcrel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s7, s7, use_module at gotpcrel32@hi+12
-; CHECK-NEXT:    s_load_dwordx2 s[10:11], s[6:7], 0x0
-; CHECK-NEXT:    s_load_dword s12, s[4:5], 0x0
+; CHECK-NEXT:    s_getpc_b64 s[8:9]
+; CHECK-NEXT:    s_add_u32 s8, s8, use_module at gotpcrel32@lo+4
+; CHECK-NEXT:    s_addc_u32 s9, s9, use_module at gotpcrel32@hi+12
+; CHECK-NEXT:    s_load_dwordx2 s[10:11], s[8:9], 0x0
+; CHECK-NEXT:    s_load_dword s12, s[6:7], 0x0
+; CHECK-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[10:11]
 ; CHECK-NEXT:    s_lshl_b32 s4, s12, 2

diff  --git a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
index 6ba0acf4337d2..540422ba45ab9 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
@@ -14,11 +14,10 @@ define void @func_use_lds_global() {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX8-NEXT:    s_mov_b32 m0, -1
-; GFX8-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX8-NEXT:    ds_write_b32 v0, v0
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_mov_b64 s[0:1], s[6:7]
 ; GFX8-NEXT:    s_trap 2
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: func_use_lds_global:
@@ -38,9 +37,7 @@ define void @func_use_lds_global_constexpr_cast() {
 ; GFX8-LABEL: func_use_lds_global_constexpr_cast:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b64 s[4:5], 0
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_mov_b64 s[0:1], s[6:7]
 ; GFX8-NEXT:    s_trap 2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
index 68eefd726df29..7760c41ce8f41 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs --amdhsa-code-object-version=4 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 
 ; GCN-LABEL: {{^}}is_private_vgpr:

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
index ce757599d02c3..69e49064c1c49 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs --amdhsa-code-object-version=4 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 
 ; GCN-LABEL: {{^}}is_local_vgpr:

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
index 1e3e8fdfb546c..53020b0080b2c 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
@@ -17,7 +17,7 @@ define amdgpu_kernel void @kern_noargs() {
 
 define amdgpu_kernel void @kern_i8(i8 %arg) #0 {
 ; HSA-LABEL: @kern_i8(
-; HSA-NEXT:    [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -26,7 +26,7 @@ define amdgpu_kernel void @kern_i8(i8 %arg) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_i8(
-; MESA-NEXT:    [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -40,7 +40,7 @@ define amdgpu_kernel void @kern_i8(i8 %arg) #0 {
 
 define amdgpu_kernel void @kern_i16(i16 %arg) #0 {
 ; HSA-LABEL: @kern_i16(
-; HSA-NEXT:    [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -49,7 +49,7 @@ define amdgpu_kernel void @kern_i16(i16 %arg) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_i16(
-; MESA-NEXT:    [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -63,7 +63,7 @@ define amdgpu_kernel void @kern_i16(i16 %arg) #0 {
 
 define amdgpu_kernel void @kern_f16(half %arg) #0 {
 ; HSA-LABEL: @kern_f16(
-; HSA-NEXT:    [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -73,7 +73,7 @@ define amdgpu_kernel void @kern_f16(half %arg) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_f16(
-; MESA-NEXT:    [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -88,7 +88,7 @@ define amdgpu_kernel void @kern_f16(half %arg) #0 {
 
 define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 {
 ; HSA-LABEL: @kern_zeroext_i8(
-; HSA-NEXT:    [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -97,7 +97,7 @@ define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_zeroext_i8(
-; MESA-NEXT:    [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -111,7 +111,7 @@ define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 {
 
 define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 {
 ; HSA-LABEL: @kern_zeroext_i16(
-; HSA-NEXT:    [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -120,7 +120,7 @@ define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_zeroext_i16(
-; MESA-NEXT:    [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -134,7 +134,7 @@ define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 {
 
 define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 {
 ; HSA-LABEL: @kern_signext_i8(
-; HSA-NEXT:    [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -143,7 +143,7 @@ define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_signext_i8(
-; MESA-NEXT:    [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -157,7 +157,7 @@ define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 {
 
 define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 {
 ; HSA-LABEL: @kern_signext_i16(
-; HSA-NEXT:    [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -166,7 +166,7 @@ define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_signext_i16(
-; MESA-NEXT:    [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -180,7 +180,7 @@ define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 {
 
 define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) {
 ; HSA-LABEL: @kern_i8_i8(
-; HSA-NEXT:    [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -195,7 +195,7 @@ define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_i8_i8(
-; MESA-NEXT:    [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -216,7 +216,7 @@ define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) {
 
 define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) {
 ; HSA-LABEL: @kern_v3i8(
-; HSA-NEXT:    [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -226,7 +226,7 @@ define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_v3i8(
-; MESA-NEXT:    [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -241,7 +241,7 @@ define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) {
 
 define amdgpu_kernel void @kern_i24(i24 %arg0) {
 ; HSA-LABEL: @kern_i24(
-; HSA-NEXT:    [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -250,7 +250,7 @@ define amdgpu_kernel void @kern_i24(i24 %arg0) {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_i24(
-; MESA-NEXT:    [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -264,7 +264,7 @@ define amdgpu_kernel void @kern_i24(i24 %arg0) {
 
 define amdgpu_kernel void @kern_i32(i32 %arg0) {
 ; HSA-LABEL: @kern_i32(
-; HSA-NEXT:    [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -272,7 +272,7 @@ define amdgpu_kernel void @kern_i32(i32 %arg0) {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_i32(
-; MESA-NEXT:    [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -285,7 +285,7 @@ define amdgpu_kernel void @kern_i32(i32 %arg0) {
 
 define amdgpu_kernel void @kern_f32(float %arg0) {
 ; HSA-LABEL: @kern_f32(
-; HSA-NEXT:    [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)*
 ; HSA-NEXT:    [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -293,7 +293,7 @@ define amdgpu_kernel void @kern_f32(float %arg0) {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_f32(
-; MESA-NEXT:    [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)*
 ; MESA-NEXT:    [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -306,7 +306,7 @@ define amdgpu_kernel void @kern_f32(float %arg0) {
 
 define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) {
 ; HSA-LABEL: @kern_v3i32(
-; HSA-NEXT:    [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -315,7 +315,7 @@ define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_v3i32(
-; MESA-NEXT:    [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -329,7 +329,7 @@ define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) {
 
 define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 {
 ; HSA-LABEL: @kern_v8i32(
-; HSA-NEXT:    [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(288) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)*
 ; HSA-NEXT:    [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -337,7 +337,7 @@ define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_v8i32(
-; MESA-NEXT:    [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(288) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)*
 ; MESA-NEXT:    [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -350,7 +350,7 @@ define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 {
 
 define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 {
 ; HSA-LABEL: @kern_v8i64(
-; HSA-NEXT:    [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(320) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(120) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)*
 ; HSA-NEXT:    [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -358,7 +358,7 @@ define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_v8i64(
-; MESA-NEXT:    [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(320) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(120) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)*
 ; MESA-NEXT:    [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -371,7 +371,7 @@ define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 {
 
 define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 {
 ; HSA-LABEL: @kern_v16i64(
-; HSA-NEXT:    [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(384) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(184) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)*
 ; HSA-NEXT:    [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -379,7 +379,7 @@ define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_v16i64(
-; MESA-NEXT:    [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(384) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(184) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)*
 ; MESA-NEXT:    [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -392,7 +392,7 @@ define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 {
 
 define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) {
 ; HSA-LABEL: @kern_i32_v3i32(
-; HSA-NEXT:    [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -405,7 +405,7 @@ define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_i32_v3i32(
-; MESA-NEXT:    [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -427,7 +427,7 @@ define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) {
 
 define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) {
 ; HSA-LABEL: @kern_struct_a(
-; HSA-NEXT:    [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)*
 ; HSA-NEXT:    [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -435,7 +435,7 @@ define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_struct_a(
-; MESA-NEXT:    [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)*
 ; MESA-NEXT:    [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -448,7 +448,7 @@ define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) {
 
 define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 {
 ; HSA-LABEL: @kern_struct_b_packed(
-; HSA-NEXT:    [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)*
 ; HSA-NEXT:    [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -456,7 +456,7 @@ define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_struct_b_packed(
-; MESA-NEXT:    [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(88) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)*
 ; MESA-NEXT:    [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -511,7 +511,7 @@ define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %a
 
 define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 {
 ; HSA-LABEL: @kern_lds_ptr(
-; HSA-NEXT:    [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
 ; HSA-NEXT:    [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -519,7 +519,7 @@ define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_lds_ptr(
-; MESA-NEXT:    [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
 ; MESA-NEXT:    [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -532,12 +532,12 @@ define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 {
 
 define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 {
 ; HSA-LABEL: @kern_lds_ptr_si(
-; HSA-NEXT:    [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_lds_ptr_si(
-; MESA-NEXT:    [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
 ; MESA-NEXT:    ret void
 ;
@@ -547,7 +547,7 @@ define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 {
 
 define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
 ; HSA-LABEL: @kern_realign_i8_i8(
-; HSA-NEXT:    [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -562,7 +562,7 @@ define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_realign_i8_i8(
-; MESA-NEXT:    [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -583,7 +583,7 @@ define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
 
 define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #0 {
 ; HSA-LABEL: @kern_realign_i8_i8_i8(
-; HSA-NEXT:    [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -604,7 +604,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_realign_i8_i8_i8(
-; MESA-NEXT:    [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -632,7 +632,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #
 
 define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) #0 {
 ; HSA-LABEL: @kern_realign_i8_i8_i8_i8(
-; HSA-NEXT:    [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -659,7 +659,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_realign_i8_i8_i8_i8(
-; MESA-NEXT:    [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -694,7 +694,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2
 
 define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 {
 ; HSA-LABEL: @kern_realign_i8_v3i8(
-; HSA-NEXT:    [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -709,7 +709,7 @@ define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_realign_i8_v3i8(
-; MESA-NEXT:    [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -730,7 +730,7 @@ define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 {
 
 define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 {
 ; HSA-LABEL: @kern_realign_i8_i16(
-; HSA-NEXT:    [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -745,7 +745,7 @@ define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_realign_i8_i16(
-; MESA-NEXT:    [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -766,7 +766,7 @@ define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 {
 
 define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 {
 ; HSA-LABEL: @kern_realign_i1_i1(
-; HSA-NEXT:    [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -781,7 +781,7 @@ define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_realign_i1_i1(
-; MESA-NEXT:    [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -802,7 +802,7 @@ define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 {
 
 define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #0 {
 ; HSA-LABEL: @kern_realign_i1_i1_i1(
-; HSA-NEXT:    [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -823,7 +823,7 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_realign_i1_i1_i1(
-; MESA-NEXT:    [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -851,7 +851,7 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #
 
 define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2, i1 %arg3) #0 {
 ; HSA-LABEL: @kern_realign_i1_i1_i1_i1(
-; HSA-NEXT:    [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -878,7 +878,7 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_realign_i1_i1_i1_i1(
-; MESA-NEXT:    [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -913,7 +913,7 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2
 
 define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 {
 ; HSA-LABEL: @kern_realign_i1_v3i1(
-; HSA-NEXT:    [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -929,7 +929,7 @@ define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_realign_i1_v3i1(
-; MESA-NEXT:    [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -951,7 +951,7 @@ define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 {
 
 define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 {
 ; HSA-LABEL: @kern_realign_i1_i16(
-; HSA-NEXT:    [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -966,7 +966,7 @@ define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_realign_i1_i16(
-; MESA-NEXT:    [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -987,7 +987,7 @@ define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 {
 
 define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7) #0 {
 ; HSA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
-; HSA-NEXT:    [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -1032,7 +1032,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %ar
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
-; MESA-NEXT:    [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -1088,7 +1088,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %ar
 
 define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 {
 ; HSA-LABEL: @kern_realign_f16_f16(
-; HSA-NEXT:    [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
@@ -1105,7 +1105,7 @@ define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_realign_f16_f16(
-; MESA-NEXT:    [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
@@ -1128,7 +1128,7 @@ define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 {
 
 define amdgpu_kernel void @kern_global_ptr(i8 addrspace(1)* %ptr) #0 {
 ; HSA-LABEL: @kern_global_ptr(
-; HSA-NEXT:    [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
 ; HSA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1136,7 +1136,7 @@ define amdgpu_kernel void @kern_global_ptr(i8 addrspace(1)* %ptr) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_global_ptr(
-; MESA-NEXT:    [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
 ; MESA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1149,7 +1149,7 @@ define amdgpu_kernel void @kern_global_ptr(i8 addrspace(1)* %ptr) #0 {
 
 define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* dereferenceable(42) %ptr) #0 {
 ; HSA-LABEL: @kern_global_ptr_dereferencable(
-; HSA-NEXT:    [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
 ; HSA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable !1
@@ -1157,7 +1157,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* deref
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_global_ptr_dereferencable(
-; MESA-NEXT:    [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
 ; MESA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable !1
@@ -1170,7 +1170,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* deref
 
 define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1)* dereferenceable_or_null(128) %ptr) #0 {
 ; HSA-LABEL: @kern_global_ptr_dereferencable_or_null(
-; HSA-NEXT:    [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
 ; HSA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable_or_null !2
@@ -1178,7 +1178,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_global_ptr_dereferencable_or_null(
-; MESA-NEXT:    [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
 ; MESA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable_or_null !2
@@ -1191,7 +1191,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1
 
 define amdgpu_kernel void @kern_nonnull_global_ptr(i8 addrspace(1)* nonnull %ptr) #0 {
 ; HSA-LABEL: @kern_nonnull_global_ptr(
-; HSA-NEXT:    [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
 ; HSA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !nonnull !0
@@ -1199,7 +1199,7 @@ define amdgpu_kernel void @kern_nonnull_global_ptr(i8 addrspace(1)* nonnull %ptr
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_nonnull_global_ptr(
-; MESA-NEXT:    [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
 ; MESA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !nonnull !0
@@ -1212,7 +1212,7 @@ define amdgpu_kernel void @kern_nonnull_global_ptr(i8 addrspace(1)* nonnull %ptr
 
 define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 %ptr) #0 {
 ; HSA-LABEL: @kern_align32_global_ptr(
-; HSA-NEXT:    [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
 ; HSA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !align !3
@@ -1220,7 +1220,7 @@ define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 %
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_align32_global_ptr(
-; MESA-NEXT:    [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
 ; MESA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !align !3
@@ -1233,12 +1233,12 @@ define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 %
 
 define amdgpu_kernel void @kern_noalias_global_ptr(i8 addrspace(1)* noalias %ptr) #0 {
 ; HSA-LABEL: @kern_noalias_global_ptr(
-; HSA-NEXT:    [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_noalias_global_ptr(
-; MESA-NEXT:    [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
 ; MESA-NEXT:    ret void
 ;
@@ -1248,13 +1248,13 @@ define amdgpu_kernel void @kern_noalias_global_ptr(i8 addrspace(1)* noalias %ptr
 
 define amdgpu_kernel void @kern_noalias_global_ptr_x2(i8 addrspace(1)* noalias %ptr0, i8 addrspace(1)* noalias %ptr1) #0 {
 ; HSA-LABEL: @kern_noalias_global_ptr_x2(
-; HSA-NEXT:    [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
 ; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_noalias_global_ptr_x2(
-; MESA-NEXT:    [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
 ; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
 ; MESA-NEXT:    ret void
@@ -1267,7 +1267,7 @@ define amdgpu_kernel void @kern_noalias_global_ptr_x2(i8 addrspace(1)* noalias %
 define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 {
 ; HSA-LABEL: @struct_i8_i8_arg(
 ; HSA-NEXT:  entry:
-; HSA-NEXT:    [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)*
 ; HSA-NEXT:    [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1279,7 +1279,7 @@ define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 {
 ;
 ; MESA-LABEL: @struct_i8_i8_arg(
 ; MESA-NEXT:  entry:
-; MESA-NEXT:    [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)*
 ; MESA-NEXT:    [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1300,7 +1300,7 @@ entry:
 define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 {
 ; HSA-LABEL: @struct_i8_i16_arg(
 ; HSA-NEXT:  entry:
-; HSA-NEXT:    [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)*
 ; HSA-NEXT:    [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1312,7 +1312,7 @@ define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 {
 ;
 ; MESA-LABEL: @struct_i8_i16_arg(
 ; MESA-NEXT:  entry:
-; MESA-NEXT:    [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)*
 ; MESA-NEXT:    [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1333,7 +1333,7 @@ entry:
 define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 {
 ; HSA-LABEL: @array_2xi8_arg(
 ; HSA-NEXT:  entry:
-; HSA-NEXT:    [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)*
 ; HSA-NEXT:    [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1345,7 +1345,7 @@ define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 {
 ;
 ; MESA-LABEL: @array_2xi8_arg(
 ; MESA-NEXT:  entry:
-; MESA-NEXT:    [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)*
 ; MESA-NEXT:    [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1366,7 +1366,7 @@ entry:
 define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 {
 ; HSA-LABEL: @array_2xi1_arg(
 ; HSA-NEXT:  entry:
-; HSA-NEXT:    [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)*
 ; HSA-NEXT:    [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1378,7 +1378,7 @@ define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 {
 ;
 ; MESA-LABEL: @array_2xi1_arg(
 ; MESA-NEXT:  entry:
-; MESA-NEXT:    [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)*
 ; MESA-NEXT:    [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1398,11 +1398,11 @@ entry:
 
 define amdgpu_kernel void @only_empty_struct({} %empty) #0 {
 ; HSA-LABEL: @only_empty_struct(
-; HSA-NEXT:    [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(256) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(56) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @only_empty_struct(
-; MESA-NEXT:    [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(256) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(56) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    ret void
 ;
   ret void
@@ -1410,7 +1410,7 @@ define amdgpu_kernel void @only_empty_struct({} %empty) #0 {
 
 define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 {
 ; HSA-LABEL: @empty_struct_with_other(
-; HSA-NEXT:    [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1418,7 +1418,7 @@ define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 {
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @empty_struct_with_other(
-; MESA-NEXT:    [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1433,7 +1433,7 @@ define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 {
 define amdgpu_kernel void @static_alloca_kern_i32(i32 %arg0) {
 ; HSA-LABEL: @static_alloca_kern_i32(
 ; HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
-; HSA-NEXT:    [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1442,7 +1442,7 @@ define amdgpu_kernel void @static_alloca_kern_i32(i32 %arg0) {
 ;
 ; MESA-LABEL: @static_alloca_kern_i32(
 ; MESA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
-; MESA-NEXT:    [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1459,7 +1459,7 @@ define amdgpu_kernel void @static_alloca_kern_i32(i32 %arg0) {
 define amdgpu_kernel void @dyn_alloca_kernarg_i32(i32 %n) {
 ; HSA-LABEL: @dyn_alloca_kernarg_i32(
 ; HSA-NEXT:    [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5)
-; HSA-NEXT:    [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[N_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[N_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[N_KERNARG_OFFSET]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[N_LOAD:%.*]] = load i32, i32 addrspace(4)* [[N_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1470,7 +1470,7 @@ define amdgpu_kernel void @dyn_alloca_kernarg_i32(i32 %n) {
 ;
 ; MESA-LABEL: @dyn_alloca_kernarg_i32(
 ; MESA-NEXT:    [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5)
-; MESA-NEXT:    [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[N_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[N_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[N_KERNARG_OFFSET]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[N_LOAD:%.*]] = load i32, i32 addrspace(4)* [[N_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1489,7 +1489,7 @@ define amdgpu_kernel void @dyn_alloca_kernarg_i32(i32 %n) {
 ; Byref pointers should only be treated as offsets from kernarg
 define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %out, i8 addrspace(4)* byref(i8) %in.byref) {
 ; HSA-LABEL: @byref_constant_i8_arg(
-; HSA-NEXT:    [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; HSA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1500,7 +1500,7 @@ define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %ou
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @byref_constant_i8_arg(
-; MESA-NEXT:    [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; MESA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1518,7 +1518,7 @@ define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %ou
 
 define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %out, i16 addrspace(4)* byref(i16) %in.byref) {
 ; HSA-LABEL: @byref_constant_i16_arg(
-; HSA-NEXT:    [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; HSA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1530,7 +1530,7 @@ define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %o
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @byref_constant_i16_arg(
-; MESA-NEXT:    [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; MESA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1549,7 +1549,7 @@ define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %o
 
 define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) %in.byref, i32 %after.offset) {
 ; HSA-LABEL: @byref_constant_i32_arg(
-; HSA-NEXT:    [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; HSA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1564,7 +1564,7 @@ define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %o
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @byref_constant_i32_arg(
-; MESA-NEXT:    [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; MESA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1586,7 +1586,7 @@ define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %o
 
 define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> addrspace(4)* byref(<4 x i32>) %in.byref, i32 %after.offset) {
 ; HSA-LABEL: @byref_constant_v4i32_arg(
-; HSA-NEXT:    [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(296) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(96) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to <4 x i32> addrspace(1)* addrspace(4)*
 ; HSA-NEXT:    [[OUT_LOAD:%.*]] = load <4 x i32> addrspace(1)*, <4 x i32> addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1602,7 +1602,7 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* noca
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @byref_constant_v4i32_arg(
-; MESA-NEXT:    [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(292) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(92) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to <4 x i32> addrspace(1)* addrspace(4)*
 ; MESA-NEXT:    [[OUT_LOAD:%.*]] = load <4 x i32> addrspace(1)*, <4 x i32> addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1626,7 +1626,7 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* noca
 
 define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(256) %in.byref, i32 %after.offset) {
 ; HSA-LABEL: @byref_align_constant_i32_arg(
-; HSA-NEXT:    [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(520) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(320) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; HSA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1641,7 +1641,7 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapt
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @byref_align_constant_i32_arg(
-; MESA-NEXT:    [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(520) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(320) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; MESA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1663,7 +1663,7 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapt
 
 define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace(1)* nocapture %out, i8, <16 x i32> addrspace(4)* byref(<16 x i32>) %in.byref, i32 %after.offset) {
 ; HSA-LABEL: @byref_natural_align_constant_v16i32_arg(
-; HSA-NEXT:    [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(392) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(192) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; HSA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1679,7 +1679,7 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @byref_natural_align_constant_v16i32_arg(
-; MESA-NEXT:    [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(388) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(188) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; MESA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1704,7 +1704,7 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace
 ; Also accept byref kernel arguments with other global address spaces.
 define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* byref(i32) %in.byref) {
 ; HSA-LABEL: @byref_global_i32_arg(
-; HSA-NEXT:    [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; HSA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1715,7 +1715,7 @@ define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @byref_global_i32_arg(
-; MESA-NEXT:    [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; MESA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1732,7 +1732,7 @@ define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out
 
 define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, i32* byref(i32) %in.byref) {
 ; HSA-LABEL: @byref_flat_i32_arg(
-; HSA-NEXT:    [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; HSA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1743,7 +1743,7 @@ define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out,
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @byref_flat_i32_arg(
-; MESA-NEXT:    [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; MESA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1760,7 +1760,7 @@ define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out,
 
 define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(6)* byref(i32) %in.byref) {
 ; HSA-LABEL: @byref_constant_32bit_i32_arg(
-; HSA-NEXT:    [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; HSA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1771,7 +1771,7 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapt
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @byref_constant_32bit_i32_arg(
-; MESA-NEXT:    [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; MESA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1788,7 +1788,7 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapt
 
 define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(999)* byref(i32) %in.byref) {
 ; HSA-LABEL: @byref_unknown_as_i32_arg(
-; HSA-NEXT:    [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; HSA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1799,7 +1799,7 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @byref_unknown_as_i32_arg(
-; MESA-NEXT:    [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; MESA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1817,7 +1817,7 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture
 ; Invalid, but should not crash.
 define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(3)* byref(i32) %in.byref) {
 ; HSA-LABEL: @byref_local_i32_arg(
-; HSA-NEXT:    [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(72) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; HSA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1828,7 +1828,7 @@ define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out,
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @byref_local_i32_arg(
-; MESA-NEXT:    [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; MESA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1845,7 +1845,7 @@ define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out,
 
 define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) %in0.byref, i32 addrspace(4)* byref(i32) %in1.byref, i32 %after.offset) {
 ; HSA-LABEL: @multi_byref_constant_i32_arg(
-; HSA-NEXT:    [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(280) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(80) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; HSA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
@@ -1864,7 +1864,7 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapt
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @multi_byref_constant_i32_arg(
-; MESA-NEXT:    [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(276) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(76) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[OUT_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[OUT_KERNARG_OFFSET]] to i32 addrspace(1)* addrspace(4)*
 ; MESA-NEXT:    [[OUT_LOAD:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[OUT_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
@@ -1892,7 +1892,7 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapt
 
 define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byref(i32) %in.byref) {
 ; HSA-LABEL: @byref_constant_i32_arg_offset0(
-; HSA-NEXT:    [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT:    [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(4)* [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to i32 addrspace(4)*
 ; HSA-NEXT:    [[IN:%.*]] = load i32, i32 addrspace(4)* [[TMP1]], align 4
@@ -1900,7 +1900,7 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byre
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @byref_constant_i32_arg_offset0(
-; MESA-NEXT:    [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT:    [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(60) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 ; MESA-NEXT:    [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(4)* [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to i32 addrspace(4)*
 ; MESA-NEXT:    [[IN:%.*]] = load i32, i32 addrspace(4)* [[TMP1]], align 4

diff  --git a/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll b/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll
index 2575fd4f3a33b..28facb38cff57 100644
--- a/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll
+++ b/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll
@@ -28,21 +28,20 @@ store i32 0, i32 addrspace(3)* @used_by_kernel
 define amdgpu_kernel void @withcall() {
 ; GFX9-LABEL: withcall:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT:    s_mov_b32 s14, -1
-; GFX9-NEXT:    s_mov_b32 s15, 0xe00000
-; GFX9-NEXT:    s_add_u32 s12, s12, s3
-; GFX9-NEXT:    s_addc_u32 s13, s13, 0
-; GFX9-NEXT:    s_add_u32 s8, s0, 36
-; GFX9-NEXT:    s_addc_u32 s9, s1, 0
-; GFX9-NEXT:    s_getpc_b64 s[0:1]
-; GFX9-NEXT:    s_add_u32 s0, s0, nonkernel at gotpcrel32@lo+4
-; GFX9-NEXT:    s_addc_u32 s1, s1, nonkernel at gotpcrel32@hi+12
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT:    s_mov_b64 s[0:1], s[12:13]
+; GFX9-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT:    s_mov_b32 s10, -1
+; GFX9-NEXT:    s_mov_b32 s11, 0xe00000
+; GFX9-NEXT:    s_add_u32 s8, s8, s3
+; GFX9-NEXT:    s_addc_u32 s9, s9, 0
+; GFX9-NEXT:    s_getpc_b64 s[2:3]
+; GFX9-NEXT:    s_add_u32 s2, s2, nonkernel at gotpcrel32@lo+4
+; GFX9-NEXT:    s_addc_u32 s3, s3, nonkernel at gotpcrel32@hi+12
+; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
+; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GFX9-NEXT:    s_mov_b64 s[0:1], s[8:9]
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-NEXT:    s_mov_b64 s[2:3], s[14:15]
+; GFX9-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
 ; GFX9-NEXT:    ds_write_b32 v0, v0 offset:8
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -51,21 +50,20 @@ define amdgpu_kernel void @withcall() {
 ;
 ; GFX10-LABEL: withcall:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT:    s_mov_b32 s14, -1
-; GFX10-NEXT:    s_mov_b32 s15, 0x31c16000
-; GFX10-NEXT:    s_add_u32 s12, s12, s3
-; GFX10-NEXT:    s_addc_u32 s13, s13, 0
-; GFX10-NEXT:    s_add_u32 s8, s0, 36
-; GFX10-NEXT:    s_addc_u32 s9, s1, 0
-; GFX10-NEXT:    s_getpc_b64 s[0:1]
-; GFX10-NEXT:    s_add_u32 s0, s0, nonkernel at gotpcrel32@lo+4
-; GFX10-NEXT:    s_addc_u32 s1, s1, nonkernel at gotpcrel32@hi+12
+; GFX10-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT:    s_mov_b32 s10, -1
+; GFX10-NEXT:    s_mov_b32 s11, 0x31c16000
+; GFX10-NEXT:    s_add_u32 s8, s8, s3
+; GFX10-NEXT:    s_addc_u32 s9, s9, 0
+; GFX10-NEXT:    s_getpc_b64 s[2:3]
+; GFX10-NEXT:    s_add_u32 s2, s2, nonkernel at gotpcrel32@lo+4
+; GFX10-NEXT:    s_addc_u32 s3, s3, nonkernel at gotpcrel32@hi+12
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX10-NEXT:    s_mov_b64 s[0:1], s[12:13]
-; GFX10-NEXT:    s_mov_b64 s[2:3], s[14:15]
+; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
+; GFX10-NEXT:    s_mov_b64 s[6:7], s[0:1]
+; GFX10-NEXT:    s_mov_b64 s[0:1], s[8:9]
+; GFX10-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; GFX10-NEXT:    s_mov_b32 s32, 0
 ; GFX10-NEXT:    ds_write_b32 v0, v0 offset:8
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
@@ -74,22 +72,21 @@ define amdgpu_kernel void @withcall() {
 ;
 ; G_GFX9-LABEL: withcall:
 ; G_GFX9:       ; %bb.0:
-; G_GFX9-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
-; G_GFX9-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
-; G_GFX9-NEXT:    s_mov_b32 s14, -1
-; G_GFX9-NEXT:    s_mov_b32 s15, 0xe00000
-; G_GFX9-NEXT:    s_add_u32 s12, s12, s3
-; G_GFX9-NEXT:    s_addc_u32 s13, s13, 0
-; G_GFX9-NEXT:    s_add_u32 s8, s0, 36
-; G_GFX9-NEXT:    s_addc_u32 s9, s1, 0
+; G_GFX9-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; G_GFX9-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; G_GFX9-NEXT:    s_mov_b32 s10, -1
+; G_GFX9-NEXT:    s_mov_b32 s11, 0xe00000
+; G_GFX9-NEXT:    s_add_u32 s8, s8, s3
+; G_GFX9-NEXT:    s_addc_u32 s9, s9, 0
+; G_GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
 ; G_GFX9-NEXT:    s_getpc_b64 s[0:1]
 ; G_GFX9-NEXT:    s_add_u32 s0, s0, nonkernel at gotpcrel32@lo+4
 ; G_GFX9-NEXT:    s_addc_u32 s1, s1, nonkernel at gotpcrel32@hi+12
 ; G_GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
-; G_GFX9-NEXT:    s_mov_b64 s[0:1], s[12:13]
+; G_GFX9-NEXT:    s_mov_b64 s[0:1], s[8:9]
 ; G_GFX9-NEXT:    v_mov_b32_e32 v0, 0
 ; G_GFX9-NEXT:    v_mov_b32_e32 v1, 8
-; G_GFX9-NEXT:    s_mov_b64 s[2:3], s[14:15]
+; G_GFX9-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; G_GFX9-NEXT:    s_mov_b32 s32, 0
 ; G_GFX9-NEXT:    ds_write_b32 v1, v0
 ; G_GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -98,22 +95,21 @@ define amdgpu_kernel void @withcall() {
 ;
 ; G_GFX10-LABEL: withcall:
 ; G_GFX10:       ; %bb.0:
-; G_GFX10-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
-; G_GFX10-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
-; G_GFX10-NEXT:    s_mov_b32 s14, -1
-; G_GFX10-NEXT:    s_mov_b32 s15, 0x31c16000
-; G_GFX10-NEXT:    s_add_u32 s12, s12, s3
-; G_GFX10-NEXT:    s_addc_u32 s13, s13, 0
-; G_GFX10-NEXT:    s_add_u32 s8, s0, 36
-; G_GFX10-NEXT:    s_addc_u32 s9, s1, 0
+; G_GFX10-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; G_GFX10-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; G_GFX10-NEXT:    s_mov_b32 s10, -1
+; G_GFX10-NEXT:    s_mov_b32 s11, 0x31c16000
+; G_GFX10-NEXT:    s_add_u32 s8, s8, s3
+; G_GFX10-NEXT:    s_addc_u32 s9, s9, 0
+; G_GFX10-NEXT:    s_mov_b64 s[6:7], s[0:1]
 ; G_GFX10-NEXT:    s_getpc_b64 s[0:1]
 ; G_GFX10-NEXT:    s_add_u32 s0, s0, nonkernel at gotpcrel32@lo+4
 ; G_GFX10-NEXT:    s_addc_u32 s1, s1, nonkernel at gotpcrel32@hi+12
 ; G_GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; G_GFX10-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; G_GFX10-NEXT:    v_mov_b32_e32 v1, 8
-; G_GFX10-NEXT:    s_mov_b64 s[0:1], s[12:13]
-; G_GFX10-NEXT:    s_mov_b64 s[2:3], s[14:15]
+; G_GFX10-NEXT:    s_mov_b64 s[0:1], s[8:9]
+; G_GFX10-NEXT:    s_mov_b64 s[2:3], s[10:11]
 ; G_GFX10-NEXT:    s_mov_b32 s32, 0
 ; G_GFX10-NEXT:    ds_write_b32 v1, v0
 ; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)

diff  --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
index 3cc077d38559b..9df93bc970a96 100644
--- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
@@ -1,7 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-
-; XFAIL: *
-
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=DEFAULTSIZE,MUBUF %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdhsa-code-object-version=5 < %s | FileCheck -check-prefixes=DEFAULTSIZE-V5,MUBUF %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 < %s | FileCheck -check-prefixes=ASSUME1024,MUBUF %s
@@ -54,42 +51,6 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
 ; MUBUF-NEXT:    s_endpgm
 ;
-; DEFAULTSIZE-V5-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align64:
-; DEFAULTSIZE-V5:       ; %bb.0: ; %entry
-; DEFAULTSIZE-V5-NEXT:    s_add_u32 s0, s0, s9
-; DEFAULTSIZE-V5-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x8
-; DEFAULTSIZE-V5-NEXT:    s_addc_u32 s1, s1, 0
-; DEFAULTSIZE-V5-NEXT:    s_movk_i32 s32, 0x400
-; DEFAULTSIZE-V5-NEXT:    s_mov_b32 s33, 0
-; DEFAULTSIZE-V5-NEXT:    s_waitcnt lgkmcnt(0)
-; DEFAULTSIZE-V5-NEXT:    s_cmp_lg_u32 s8, 0
-; DEFAULTSIZE-V5-NEXT:    s_cbranch_scc1 .LBB0_3
-; DEFAULTSIZE-V5-NEXT:  ; %bb.1: ; %bb.0
-; DEFAULTSIZE-V5-NEXT:    s_cmp_lg_u32 s9, 0
-; DEFAULTSIZE-V5-NEXT:    s_cbranch_scc1 .LBB0_3
-; DEFAULTSIZE-V5-NEXT:  ; %bb.2: ; %bb.1
-; DEFAULTSIZE-V5-NEXT:    s_add_i32 s6, s32, 0x1000
-; DEFAULTSIZE-V5-NEXT:    s_lshl_b32 s7, s10, 2
-; DEFAULTSIZE-V5-NEXT:    s_mov_b32 s32, s6
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v1, 0
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v2, s6
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v3, 1
-; DEFAULTSIZE-V5-NEXT:    s_add_i32 s6, s6, s7
-; DEFAULTSIZE-V5-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
-; DEFAULTSIZE-V5-NEXT:    buffer_store_dword v3, v2, s[0:3], 0 offen offset:4
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v2, s6
-; DEFAULTSIZE-V5-NEXT:    buffer_load_dword v2, v2, s[0:3], 0 offen
-; DEFAULTSIZE-V5-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
-; DEFAULTSIZE-V5-NEXT:    s_waitcnt vmcnt(0)
-; DEFAULTSIZE-V5-NEXT:    v_add_u32_e32 v0, v2, v0
-; DEFAULTSIZE-V5-NEXT:    s_waitcnt lgkmcnt(0)
-; DEFAULTSIZE-V5-NEXT:    global_store_dword v1, v0, s[4:5]
-; DEFAULTSIZE-V5-NEXT:  .LBB0_3: ; %bb.2
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v0, 0
-; DEFAULTSIZE-V5-NEXT:    global_store_dword v[0:1], v0, off
-; DEFAULTSIZE-V5-NEXT:    s_waitcnt vmcnt(0)
-; DEFAULTSIZE-V5-NEXT:    s_endpgm
-;
 ; FLATSCR-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align4:
 ; FLATSCR:       ; %bb.0: ; %entry
 ; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s2, s5
@@ -149,8 +110,8 @@ bb.2:
   store volatile i32 0, i32 addrspace(1)* undef
   ret void
 }
-; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 16
-; DEFAULTSIZE: ; ScratchSize: 16
+; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4112
+; DEFAULTSIZE: ; ScratchSize: 4112
 ; DEFAULTSIZE-V5: .amdhsa_private_segment_fixed_size 16
 ; DEFAULTSIZE-V5: .amdhsa_uses_dynamic_stack 1
 ; DEFAULTSIZE-V5: ; ScratchSize: 16
@@ -193,40 +154,6 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
 ; MUBUF-NEXT:    s_endpgm
 ;
-; DEFAULTSIZE-V5-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align64:
-; DEFAULTSIZE-V5:       ; %bb.0: ; %entry
-; DEFAULTSIZE-V5-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x8
-; DEFAULTSIZE-V5-NEXT:    s_add_u32 s0, s0, s9
-; DEFAULTSIZE-V5-NEXT:    s_addc_u32 s1, s1, 0
-; DEFAULTSIZE-V5-NEXT:    s_movk_i32 s32, 0x1000
-; DEFAULTSIZE-V5-NEXT:    s_mov_b32 s33, 0
-; DEFAULTSIZE-V5-NEXT:    s_waitcnt lgkmcnt(0)
-; DEFAULTSIZE-V5-NEXT:    s_cmp_lg_u32 s6, 0
-; DEFAULTSIZE-V5-NEXT:    s_cbranch_scc1 .LBB1_2
-; DEFAULTSIZE-V5-NEXT:  ; %bb.1: ; %bb.0
-; DEFAULTSIZE-V5-NEXT:    s_add_i32 s6, s32, 0x1000
-; DEFAULTSIZE-V5-NEXT:    s_and_b32 s6, s6, 0xfffff000
-; DEFAULTSIZE-V5-NEXT:    s_lshl_b32 s7, s7, 2
-; DEFAULTSIZE-V5-NEXT:    s_mov_b32 s32, s6
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v1, 0
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v2, s6
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v3, 1
-; DEFAULTSIZE-V5-NEXT:    s_add_i32 s6, s6, s7
-; DEFAULTSIZE-V5-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
-; DEFAULTSIZE-V5-NEXT:    buffer_store_dword v3, v2, s[0:3], 0 offen offset:4
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v2, s6
-; DEFAULTSIZE-V5-NEXT:    buffer_load_dword v2, v2, s[0:3], 0 offen
-; DEFAULTSIZE-V5-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
-; DEFAULTSIZE-V5-NEXT:    s_waitcnt vmcnt(0)
-; DEFAULTSIZE-V5-NEXT:    v_add_u32_e32 v0, v2, v0
-; DEFAULTSIZE-V5-NEXT:    s_waitcnt lgkmcnt(0)
-; DEFAULTSIZE-V5-NEXT:    global_store_dword v1, v0, s[4:5]
-; DEFAULTSIZE-V5-NEXT:  .LBB1_2: ; %bb.1
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v0, 0
-; DEFAULTSIZE-V5-NEXT:    global_store_dword v[0:1], v0, off
-; DEFAULTSIZE-V5-NEXT:    s_waitcnt vmcnt(0)
-; DEFAULTSIZE-V5-NEXT:    s_endpgm
-;
 ; FLATSCR-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align64:
 ; FLATSCR:       ; %bb.0: ; %entry
 ; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s2, s5
@@ -279,8 +206,8 @@ bb.1:
   ret void
 }
 
-; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 64
-; DEFAULTSIZE: ; ScratchSize: 64
+; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4160
+; DEFAULTSIZE: ; ScratchSize: 4160
 ; DEFAULTSIZE-V5: .amdhsa_private_segment_fixed_size 64
 ; DEFAULTSIZE-V5: .amdhsa_uses_dynamic_stack 1
 ; DEFAULTSIZE-V5: ; ScratchSize: 64
@@ -326,42 +253,6 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i
 ; MUBUF-NEXT:    s_mov_b32 s33, s7
 ; MUBUF-NEXT:    s_setpc_b64 s[30:31]
 ;
-; DEFAULTSIZE-V5-LABEL: func_non_entry_block_static_alloca_align4:
-; DEFAULTSIZE-V5:       ; %bb.0: ; %entry
-; DEFAULTSIZE-V5-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; DEFAULTSIZE-V5-NEXT:    s_mov_b32 s7, s33
-; DEFAULTSIZE-V5-NEXT:    s_mov_b32 s33, s32
-; DEFAULTSIZE-V5-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
-; DEFAULTSIZE-V5-NEXT:    s_addk_i32 s32, 0x400
-; DEFAULTSIZE-V5-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; DEFAULTSIZE-V5-NEXT:    s_cbranch_execz .LBB2_3
-; DEFAULTSIZE-V5-NEXT:  ; %bb.1: ; %bb.0
-; DEFAULTSIZE-V5-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
-; DEFAULTSIZE-V5-NEXT:    s_and_b64 exec, exec, vcc
-; DEFAULTSIZE-V5-NEXT:    s_cbranch_execz .LBB2_3
-; DEFAULTSIZE-V5-NEXT:  ; %bb.2: ; %bb.1
-; DEFAULTSIZE-V5-NEXT:    s_add_i32 s6, s32, 0x1000
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v2, 0
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v3, s6
-; DEFAULTSIZE-V5-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v2, 1
-; DEFAULTSIZE-V5-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen offset:4
-; DEFAULTSIZE-V5-NEXT:    v_lshl_add_u32 v2, v4, 2, s6
-; DEFAULTSIZE-V5-NEXT:    buffer_load_dword v2, v2, s[0:3], 0 offen
-; DEFAULTSIZE-V5-NEXT:    v_and_b32_e32 v3, 0x3ff, v31
-; DEFAULTSIZE-V5-NEXT:    s_mov_b32 s32, s6
-; DEFAULTSIZE-V5-NEXT:    s_waitcnt vmcnt(0)
-; DEFAULTSIZE-V5-NEXT:    v_add_u32_e32 v2, v2, v3
-; DEFAULTSIZE-V5-NEXT:    global_store_dword v[0:1], v2, off
-; DEFAULTSIZE-V5-NEXT:  .LBB2_3: ; %bb.2
-; DEFAULTSIZE-V5-NEXT:    s_or_b64 exec, exec, s[4:5]
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v0, 0
-; DEFAULTSIZE-V5-NEXT:    global_store_dword v[0:1], v0, off
-; DEFAULTSIZE-V5-NEXT:    s_waitcnt vmcnt(0)
-; DEFAULTSIZE-V5-NEXT:    s_addk_i32 s32, 0xfc00
-; DEFAULTSIZE-V5-NEXT:    s_mov_b32 s33, s7
-; DEFAULTSIZE-V5-NEXT:    s_setpc_b64 s[30:31]
-;
 ; FLATSCR-LABEL: func_non_entry_block_static_alloca_align4:
 ; FLATSCR:       ; %bb.0: ; %entry
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -458,40 +349,6 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out,
 ; MUBUF-NEXT:    s_mov_b32 s33, s7
 ; MUBUF-NEXT:    s_setpc_b64 s[30:31]
 ;
-; DEFAULTSIZE-V5-LABEL: func_non_entry_block_static_alloca_align64:
-; DEFAULTSIZE-V5:       ; %bb.0: ; %entry
-; DEFAULTSIZE-V5-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; DEFAULTSIZE-V5-NEXT:    s_mov_b32 s7, s33
-; DEFAULTSIZE-V5-NEXT:    s_add_i32 s33, s32, 0xfc0
-; DEFAULTSIZE-V5-NEXT:    s_and_b32 s33, s33, 0xfffff000
-; DEFAULTSIZE-V5-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
-; DEFAULTSIZE-V5-NEXT:    s_addk_i32 s32, 0x2000
-; DEFAULTSIZE-V5-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; DEFAULTSIZE-V5-NEXT:    s_cbranch_execz .LBB3_2
-; DEFAULTSIZE-V5-NEXT:  ; %bb.1: ; %bb.0
-; DEFAULTSIZE-V5-NEXT:    s_add_i32 s6, s32, 0x1000
-; DEFAULTSIZE-V5-NEXT:    s_and_b32 s6, s6, 0xfffff000
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v2, 0
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v4, s6
-; DEFAULTSIZE-V5-NEXT:    buffer_store_dword v2, v4, s[0:3], 0 offen
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v2, 1
-; DEFAULTSIZE-V5-NEXT:    buffer_store_dword v2, v4, s[0:3], 0 offen offset:4
-; DEFAULTSIZE-V5-NEXT:    v_lshl_add_u32 v2, v3, 2, s6
-; DEFAULTSIZE-V5-NEXT:    buffer_load_dword v2, v2, s[0:3], 0 offen
-; DEFAULTSIZE-V5-NEXT:    v_and_b32_e32 v3, 0x3ff, v31
-; DEFAULTSIZE-V5-NEXT:    s_mov_b32 s32, s6
-; DEFAULTSIZE-V5-NEXT:    s_waitcnt vmcnt(0)
-; DEFAULTSIZE-V5-NEXT:    v_add_u32_e32 v2, v2, v3
-; DEFAULTSIZE-V5-NEXT:    global_store_dword v[0:1], v2, off
-; DEFAULTSIZE-V5-NEXT:  .LBB3_2: ; %bb.1
-; DEFAULTSIZE-V5-NEXT:    s_or_b64 exec, exec, s[4:5]
-; DEFAULTSIZE-V5-NEXT:    v_mov_b32_e32 v0, 0
-; DEFAULTSIZE-V5-NEXT:    global_store_dword v[0:1], v0, off
-; DEFAULTSIZE-V5-NEXT:    s_waitcnt vmcnt(0)
-; DEFAULTSIZE-V5-NEXT:    s_addk_i32 s32, 0xe000
-; DEFAULTSIZE-V5-NEXT:    s_mov_b32 s33, s7
-; DEFAULTSIZE-V5-NEXT:    s_setpc_b64 s[30:31]
-;
 ; FLATSCR-LABEL: func_non_entry_block_static_alloca_align64:
 ; FLATSCR:       ; %bb.0: ; %entry
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -549,6 +406,3 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
 
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; ASSUME1024: {{.*}}
-; DEFAULTSIZE: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
index cdeaa6af61ae8..ec83d7f313d6a 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
@@ -77,7 +77,7 @@ declare i32 @foo(ptr addrspace(5)) #0
 ; ASM: buffer_store_dword
 ; ASM: buffer_store_dword
 ; ASM: s_swappc_b64
-; ASM: ScratchSize: 16
+; ASM: ScratchSize: 16400
 define amdgpu_kernel void @call_private(ptr addrspace(1) %out, i32 %in) #0 {
 entry:
   %tmp = alloca [2 x i32], addrspace(5)

diff  --git a/llvm/test/CodeGen/AMDGPU/recursion.ll b/llvm/test/CodeGen/AMDGPU/recursion.ll
index 484e3432c4d0f..9aedfad6fe320 100644
--- a/llvm/test/CodeGen/AMDGPU/recursion.ll
+++ b/llvm/test/CodeGen/AMDGPU/recursion.ll
@@ -32,7 +32,7 @@ define void @tail_recursive_with_stack() {
 ; For an arbitrary recursive call, report a large number for unknown stack
 ; usage for code object v4 and older
 ; CHECK-LABEL: {{^}}calls_recursive:
-; CHECK: .amdhsa_private_segment_fixed_size 16{{$}}
+; CHECK: .amdhsa_private_segment_fixed_size 16400{{$}}
 ;
 ; V5-LABEL: {{^}}calls_recursive:
 ; V5: .amdhsa_private_segment_fixed_size 0{{$}}
@@ -56,7 +56,7 @@ define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() {
 ; in the kernel.
 
 ; CHECK-LABEL: {{^}}kernel_calls_tail_recursive:
-; CHECK: .amdhsa_private_segment_fixed_size 0{{$}}
+; CHECK: .amdhsa_private_segment_fixed_size 16384{{$}}
 ;
 ; V5-LABEL: {{^}}kernel_calls_tail_recursive:
 ; V5: .amdhsa_private_segment_fixed_size 0{{$}}
@@ -67,7 +67,7 @@ define amdgpu_kernel void @kernel_calls_tail_recursive() {
 }
 
 ; CHECK-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
-; CHECK: .amdhsa_private_segment_fixed_size 8{{$}}
+; CHECK: .amdhsa_private_segment_fixed_size 16384{{$}}
 ;
 ; V5-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
 ; V5: .amdhsa_private_segment_fixed_size 8{{$}}

diff  --git a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
index 4812a8f157138..d1ec9d8afaaf9 100644
--- a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
@@ -1,5 +1,5 @@
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -S --amdhsa-code-object-version=4 -amdgpu-lower-kernel-attributes -instcombine %s | FileCheck -enable-var-scope %s
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -S --amdhsa-code-object-version=4 -passes=amdgpu-lower-kernel-attributes,instcombine %s | FileCheck -enable-var-scope %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -amdgpu-lower-kernel-attributes -instcombine %s | FileCheck -enable-var-scope %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-attributes,instcombine %s | FileCheck -enable-var-scope %s
 
 target datalayout = "n32"
 

diff  --git a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
index 2ad1590072890..1c8b8be33b0ad 100644
--- a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
@@ -21,7 +21,7 @@ define internal fastcc void @unreachable() {
 ; GCN: s_endpgm
 
 ; GCN: .amdhsa_private_segment_fixed_size 0
-; GCN: .amdhsa_uses_dynamic_stack 0
+; GCN-NOT: .amdhsa_uses_dynamic_stack 0
 ; GCN-V5: .amdhsa_uses_dynamic_stack 0
 define amdgpu_kernel void @entry() {
 bb0:

diff  --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll
index e7148e2eb057c..b20f540cf2472 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll
@@ -13,19 +13,24 @@ define amdgpu_kernel void @kernel() {
 ; GCN-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
 ; GCN-NEXT:    s_mov_b32 s38, -1
 ; GCN-NEXT:    s_mov_b32 s39, 0xe00000
-; GCN-NEXT:    s_add_u32 s36, s36, s9
+; GCN-NEXT:    v_writelane_b32 v40, s4, 0
+; GCN-NEXT:    s_add_u32 s36, s36, s11
+; GCN-NEXT:    v_writelane_b32 v40, s5, 1
 ; GCN-NEXT:    s_addc_u32 s37, s37, 0
-; GCN-NEXT:    s_mov_b32 s14, s8
-; GCN-NEXT:    s_add_u32 s8, s2, 36
-; GCN-NEXT:    s_addc_u32 s9, s3, 0
-; GCN-NEXT:    s_mov_b64 s[10:11], s[4:5]
 ; GCN-NEXT:    s_mov_b64 s[4:5], s[0:1]
+; GCN-NEXT:    v_readlane_b32 s0, v40, 0
+; GCN-NEXT:    s_mov_b32 s13, s9
+; GCN-NEXT:    s_mov_b32 s12, s8
+; GCN-NEXT:    v_readlane_b32 s1, v40, 1
+; GCN-NEXT:    s_add_u32 s8, s0, 36
+; GCN-NEXT:    s_addc_u32 s9, s1, 0
 ; GCN-NEXT:    s_getpc_b64 s[0:1]
 ; GCN-NEXT:    s_add_u32 s0, s0, foo at gotpcrel32@lo+4
 ; GCN-NEXT:    s_addc_u32 s1, s1, foo at gotpcrel32@hi+12
-; GCN-NEXT:    s_mov_b32 s13, s7
-; GCN-NEXT:    s_mov_b32 s12, s6
-; GCN-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x0
+; GCN-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x0
+; GCN-NEXT:    s_mov_b32 s14, s10
+; GCN-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; GCN-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GCN-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -33,7 +38,7 @@ define amdgpu_kernel void @kernel() {
 ; GCN-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GCN-NEXT:    s_mov_b32 s32, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GCN-NEXT:    s_endpgm
   call void @foo()
   ret void

diff  --git a/llvm/test/CodeGen/AMDGPU/trap.ll b/llvm/test/CodeGen/AMDGPU/trap.ll
index 60940d57309a2..77b8b47d1f0b0 100644
--- a/llvm/test/CodeGen/AMDGPU/trap.ll
+++ b/llvm/test/CodeGen/AMDGPU/trap.ll
@@ -31,11 +31,11 @@ declare void @llvm.debugtrap() #1
 
 ; MESA-TRAP: .section .AMDGPU.config
 ; MESA-TRAP:  .long   47180
-; MESA-TRAP-NEXT: .long   204
+; MESA-TRAP-NEXT: .long   208
 
 ; NOMESA-TRAP: .section .AMDGPU.config
 ; NOMESA-TRAP:  .long   47180
-; NOMESA-TRAP-NEXT: .long   140
+; NOMESA-TRAP-NEXT: .long   144
 
 ; GCN-LABEL: {{^}}hsa_trap:
 ; HSA-TRAP: enable_trap_handler = 0

diff  --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
index 5db2644abc22d..f945a797b774c 100644
--- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
+++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
@@ -30,59 +30,67 @@ bb:
 define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i, i32 %tmp5.i.i, i32 %tmp427.i, i1 %tmp438.i, double %tmp27.i, i1 %tmp48.i) {
 ; GLOBALNESS1-LABEL: kernel:
 ; GLOBALNESS1:       ; %bb.0: ; %bb
-; GLOBALNESS1-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GLOBALNESS1-NEXT:    s_load_dwordx4 s[84:87], s[6:7], 0x0
-; GLOBALNESS1-NEXT:    s_nop 0
-; GLOBALNESS1-NEXT:    s_load_dword s6, s[6:7], 0x14
-; GLOBALNESS1-NEXT:    v_mov_b32_e32 v41, v0
-; GLOBALNESS1-NEXT:    v_mov_b32_e32 v42, 0
+; GLOBALNESS1-NEXT:    s_mov_b64 s[54:55], s[6:7]
+; GLOBALNESS1-NEXT:    s_load_dwordx4 s[36:39], s[8:9], 0x0
+; GLOBALNESS1-NEXT:    s_load_dword s6, s[8:9], 0x14
+; GLOBALNESS1-NEXT:    v_mov_b32_e32 v42, v0
+; GLOBALNESS1-NEXT:    v_mov_b32_e32 v44, 0
 ; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[0:1], 0, 0
-; GLOBALNESS1-NEXT:    global_store_dword v[0:1], v42, off
+; GLOBALNESS1-NEXT:    global_store_dword v[0:1], v44, off
 ; GLOBALNESS1-NEXT:    s_waitcnt lgkmcnt(0)
-; GLOBALNESS1-NEXT:    global_load_dword v0, v42, s[84:85]
-; GLOBALNESS1-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
-; GLOBALNESS1-NEXT:    s_mov_b64 s[36:37], s[4:5]
-; GLOBALNESS1-NEXT:    s_load_dwordx2 s[4:5], s[38:39], 0x18
-; GLOBALNESS1-NEXT:    s_load_dword s7, s[38:39], 0x20
-; GLOBALNESS1-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
-; GLOBALNESS1-NEXT:    s_add_u32 s0, s0, s15
+; GLOBALNESS1-NEXT:    global_load_dword v0, v44, s[36:37]
+; GLOBALNESS1-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; GLOBALNESS1-NEXT:    s_mov_b64 s[64:65], s[4:5]
+; GLOBALNESS1-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x18
+; GLOBALNESS1-NEXT:    s_load_dword s7, s[8:9], 0x20
+; GLOBALNESS1-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; GLOBALNESS1-NEXT:    s_add_u32 s0, s0, s17
 ; GLOBALNESS1-NEXT:    s_addc_u32 s1, s1, 0
-; GLOBALNESS1-NEXT:    v_mov_b32_e32 v43, 0x40994400
-; GLOBALNESS1-NEXT:    s_bitcmp1_b32 s86, 0
+; GLOBALNESS1-NEXT:    v_mov_b32_e32 v45, 0x40994400
+; GLOBALNESS1-NEXT:    s_bitcmp1_b32 s38, 0
 ; GLOBALNESS1-NEXT:    s_waitcnt lgkmcnt(0)
-; GLOBALNESS1-NEXT:    v_cmp_ngt_f64_e64 s[40:41], s[4:5], v[42:43]
-; GLOBALNESS1-NEXT:    v_cmp_ngt_f64_e64 s[90:91], s[4:5], 0
+; GLOBALNESS1-NEXT:    v_cmp_ngt_f64_e64 s[36:37], s[4:5], v[44:45]
+; GLOBALNESS1-NEXT:    v_cmp_ngt_f64_e64 s[40:41], s[4:5], 0
 ; GLOBALNESS1-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GLOBALNESS1-NEXT:    s_xor_b64 s[92:93], s[4:5], -1
+; GLOBALNESS1-NEXT:    s_xor_b64 s[94:95], s[4:5], -1
 ; GLOBALNESS1-NEXT:    s_bitcmp1_b32 s6, 0
 ; GLOBALNESS1-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
 ; GLOBALNESS1-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GLOBALNESS1-NEXT:    s_xor_b64 s[94:95], s[4:5], -1
+; GLOBALNESS1-NEXT:    s_xor_b64 s[88:89], s[4:5], -1
 ; GLOBALNESS1-NEXT:    s_bitcmp1_b32 s7, 0
 ; GLOBALNESS1-NEXT:    s_cselect_b64 s[4:5], -1, 0
 ; GLOBALNESS1-NEXT:    s_getpc_b64 s[6:7]
 ; GLOBALNESS1-NEXT:    s_add_u32 s6, s6, wobble at gotpcrel32@lo+4
 ; GLOBALNESS1-NEXT:    s_addc_u32 s7, s7, wobble at gotpcrel32@hi+12
-; GLOBALNESS1-NEXT:    s_load_dwordx2 s[98:99], s[6:7], 0x0
-; GLOBALNESS1-NEXT:    s_mov_b32 s60, s14
-; GLOBALNESS1-NEXT:    s_mov_b32 s61, s13
-; GLOBALNESS1-NEXT:    s_mov_b32 s62, s12
-; GLOBALNESS1-NEXT:    s_mov_b64 s[34:35], s[8:9]
-; GLOBALNESS1-NEXT:    s_mov_b64 s[88:89], 0x80
+; GLOBALNESS1-NEXT:    s_xor_b64 s[86:87], s[4:5], -1
+; GLOBALNESS1-NEXT:    s_load_dwordx2 s[66:67], s[6:7], 0x0
+; GLOBALNESS1-NEXT:    s_mov_b32 s98, s16
+; GLOBALNESS1-NEXT:    s_mov_b64 s[62:63], s[8:9]
+; GLOBALNESS1-NEXT:    s_mov_b32 s99, s15
+; GLOBALNESS1-NEXT:    s_mov_b32 s100, s14
+; GLOBALNESS1-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[92:93], 0x80
 ; GLOBALNESS1-NEXT:    v_cmp_ne_u32_e64 s[42:43], 1, v1
-; GLOBALNESS1-NEXT:    s_xor_b64 s[96:97], s[4:5], -1
-; GLOBALNESS1-NEXT:    s_mov_b32 s65, 0x3ff00000
+; GLOBALNESS1-NEXT:    s_mov_b32 s69, 0x3ff00000
 ; GLOBALNESS1-NEXT:    s_mov_b32 s32, 0
 ; GLOBALNESS1-NEXT:    ; implicit-def: $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39_agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47_agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55_agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
 ; GLOBALNESS1-NEXT:    s_waitcnt vmcnt(0)
-; GLOBALNESS1-NEXT:    v_cmp_gt_i32_e64 s[44:45], 0, v0
-; GLOBALNESS1-NEXT:    v_cmp_gt_i32_e64 s[46:47], 1, v0
-; GLOBALNESS1-NEXT:    v_cmp_eq_u32_e64 s[48:49], 1, v0
-; GLOBALNESS1-NEXT:    v_cmp_eq_u32_e64 s[50:51], 0, v0
+; GLOBALNESS1-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GLOBALNESS1-NEXT:    v_writelane_b32 v41, s4, 0
+; GLOBALNESS1-NEXT:    v_writelane_b32 v41, s5, 1
+; GLOBALNESS1-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v0
+; GLOBALNESS1-NEXT:    v_writelane_b32 v41, s4, 2
+; GLOBALNESS1-NEXT:    v_writelane_b32 v41, s5, 3
+; GLOBALNESS1-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GLOBALNESS1-NEXT:    v_writelane_b32 v41, s4, 4
+; GLOBALNESS1-NEXT:    v_cmp_gt_i32_e64 s[90:91], 1, v0
+; GLOBALNESS1-NEXT:    v_writelane_b32 v41, s5, 5
 ; GLOBALNESS1-NEXT:    s_branch .LBB1_4
 ; GLOBALNESS1-NEXT:  .LBB1_1: ; %bb70.i
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[50:51]
+; GLOBALNESS1-NEXT:    v_readlane_b32 s6, v41, 4
+; GLOBALNESS1-NEXT:    v_readlane_b32 s7, v41, 5
+; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[6:7]
 ; GLOBALNESS1-NEXT:    s_cbranch_vccz .LBB1_29
 ; GLOBALNESS1-NEXT:  .LBB1_2: ; %Flow6
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_4 Depth=1
@@ -128,31 +136,32 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
 ; GLOBALNESS1-NEXT:  .LBB1_4: ; %bb5
 ; GLOBALNESS1-NEXT:    ; =>This Loop Header: Depth=1
 ; GLOBALNESS1-NEXT:    ; Child Loop BB1_15 Depth 2
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[0:1], s[88:89], s[88:89] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[0:1], s[92:93], s[92:93] op_sel:[0,1]
 ; GLOBALNESS1-NEXT:    flat_load_dword v40, v[0:1]
-; GLOBALNESS1-NEXT:    s_add_u32 s8, s38, 40
-; GLOBALNESS1-NEXT:    buffer_store_dword v42, off, s[0:3], 0
-; GLOBALNESS1-NEXT:    flat_load_dword v44, v[0:1]
-; GLOBALNESS1-NEXT:    s_addc_u32 s9, s39, 0
-; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[36:37]
+; GLOBALNESS1-NEXT:    s_add_u32 s8, s62, 40
+; GLOBALNESS1-NEXT:    buffer_store_dword v44, off, s[0:3], 0
+; GLOBALNESS1-NEXT:    flat_load_dword v43, v[0:1]
+; GLOBALNESS1-NEXT:    s_addc_u32 s9, s63, 0
+; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[64:65]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[6:7], s[54:55]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS1-NEXT:    s_mov_b32 s12, s62
-; GLOBALNESS1-NEXT:    s_mov_b32 s13, s61
-; GLOBALNESS1-NEXT:    s_mov_b32 s14, s60
-; GLOBALNESS1-NEXT:    v_mov_b32_e32 v31, v41
+; GLOBALNESS1-NEXT:    s_mov_b32 s12, s100
+; GLOBALNESS1-NEXT:    s_mov_b32 s13, s99
+; GLOBALNESS1-NEXT:    s_mov_b32 s14, s98
+; GLOBALNESS1-NEXT:    v_mov_b32_e32 v31, v42
 ; GLOBALNESS1-NEXT:    s_waitcnt lgkmcnt(0)
-; GLOBALNESS1-NEXT:    s_swappc_b64 s[30:31], s[98:99]
+; GLOBALNESS1-NEXT:    s_swappc_b64 s[30:31], s[66:67]
 ; GLOBALNESS1-NEXT:    s_and_b64 vcc, exec, s[42:43]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[6:7], -1
 ; GLOBALNESS1-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GLOBALNESS1-NEXT:    s_cbranch_vccnz .LBB1_8
 ; GLOBALNESS1-NEXT:  ; %bb.5: ; %NodeBlock
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT:    s_cmp_lt_i32 s87, 1
+; GLOBALNESS1-NEXT:    s_cmp_lt_i32 s39, 1
 ; GLOBALNESS1-NEXT:    s_cbranch_scc1 .LBB1_7
 ; GLOBALNESS1-NEXT:  ; %bb.6: ; %LeafBlock3
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT:    s_cmp_lg_u32 s87, 1
+; GLOBALNESS1-NEXT:    s_cmp_lg_u32 s39, 1
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], -1
 ; GLOBALNESS1-NEXT:    s_cselect_b64 s[6:7], -1, 0
 ; GLOBALNESS1-NEXT:    s_cbranch_execnz .LBB1_8
@@ -169,124 +178,130 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_4 Depth=1
 ; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[32:33], 0, 0
 ; GLOBALNESS1-NEXT:    flat_load_dword v0, v[32:33]
-; GLOBALNESS1-NEXT:    s_mov_b32 s64, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s66, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s67, s65
-; GLOBALNESS1-NEXT:    s_mov_b32 s68, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s69, s65
-; GLOBALNESS1-NEXT:    s_mov_b32 s70, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s71, s65
-; GLOBALNESS1-NEXT:    s_mov_b32 s72, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s73, s65
-; GLOBALNESS1-NEXT:    s_mov_b32 s74, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s75, s65
-; GLOBALNESS1-NEXT:    s_mov_b32 s76, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s77, s65
-; GLOBALNESS1-NEXT:    s_mov_b32 s78, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s79, s65
-; GLOBALNESS1-NEXT:    s_mov_b32 s80, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s81, s65
+; GLOBALNESS1-NEXT:    s_mov_b32 s68, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s70, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s71, s69
+; GLOBALNESS1-NEXT:    s_mov_b32 s72, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s73, s69
+; GLOBALNESS1-NEXT:    s_mov_b32 s74, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s75, s69
+; GLOBALNESS1-NEXT:    s_mov_b32 s76, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s77, s69
+; GLOBALNESS1-NEXT:    s_mov_b32 s78, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s79, s69
+; GLOBALNESS1-NEXT:    s_mov_b32 s80, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s81, s69
+; GLOBALNESS1-NEXT:    s_mov_b32 s82, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s83, s69
+; GLOBALNESS1-NEXT:    s_mov_b32 s84, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s85, s69
 ; GLOBALNESS1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GLOBALNESS1-NEXT:    v_cmp_gt_i32_e64 s[52:53], 0, v0
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[0:1], s[64:65], s[64:65] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[2:3], s[66:67], s[66:67] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[4:5], s[68:69], s[68:69] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[6:7], s[70:71], s[70:71] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[8:9], s[72:73], s[72:73] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[10:11], s[74:75], s[74:75] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[12:13], s[76:77], s[76:77] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[14:15], s[78:79], s[78:79] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[16:17], s[80:81], s[80:81] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[18:19], s[82:83], s[82:83] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[20:21], s[84:85], s[84:85] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[22:23], s[86:87], s[86:87] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[24:25], s[88:89], s[88:89] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[26:27], s[90:91], s[90:91] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[28:29], s[92:93], s[92:93] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[30:31], s[94:95], s[94:95] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    s_and_saveexec_b64 s[66:67], s[52:53]
+; GLOBALNESS1-NEXT:    v_cmp_gt_i32_e64 s[96:97], 0, v0
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[0:1], s[68:69], s[68:69] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[2:3], s[70:71], s[70:71] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[4:5], s[72:73], s[72:73] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[6:7], s[74:75], s[74:75] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[8:9], s[76:77], s[76:77] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[10:11], s[78:79], s[78:79] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[12:13], s[80:81], s[80:81] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[14:15], s[82:83], s[82:83] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[16:17], s[84:85], s[84:85] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[18:19], s[86:87], s[86:87] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[20:21], s[88:89], s[88:89] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[22:23], s[90:91], s[90:91] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[24:25], s[92:93], s[92:93] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[26:27], s[94:95], s[94:95] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[28:29], s[96:97], s[96:97] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[30:31], s[98:99], s[98:99] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    s_and_saveexec_b64 s[70:71], s[96:97]
 ; GLOBALNESS1-NEXT:    s_cbranch_execz .LBB1_26
 ; GLOBALNESS1-NEXT:  ; %bb.10: ; %bb33.i
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_4 Depth=1
 ; GLOBALNESS1-NEXT:    global_load_dwordx2 v[0:1], v[32:33], off
-; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[44:45]
+; GLOBALNESS1-NEXT:    v_readlane_b32 s4, v41, 0
+; GLOBALNESS1-NEXT:    v_readlane_b32 s5, v41, 1
+; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
 ; GLOBALNESS1-NEXT:    s_cbranch_vccnz .LBB1_12
 ; GLOBALNESS1-NEXT:  ; %bb.11: ; %bb39.i
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT:    v_mov_b32_e32 v43, v42
+; GLOBALNESS1-NEXT:    v_mov_b32_e32 v45, v44
 ; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[2:3], 0, 0
-; GLOBALNESS1-NEXT:    global_store_dwordx2 v[2:3], v[42:43], off
+; GLOBALNESS1-NEXT:    global_store_dwordx2 v[2:3], v[44:45], off
 ; GLOBALNESS1-NEXT:  .LBB1_12: ; %bb44.lr.ph.i
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v44
+; GLOBALNESS1-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v43
 ; GLOBALNESS1-NEXT:    v_cndmask_b32_e32 v2, 0, v40, vcc
+; GLOBALNESS1-NEXT:    s_mov_b64 s[72:73], s[42:43]
+; GLOBALNESS1-NEXT:    s_mov_b32 s75, s39
 ; GLOBALNESS1-NEXT:    s_waitcnt vmcnt(0)
-; GLOBALNESS1-NEXT:    v_cmp_nlt_f64_e64 s[54:55], 0, v[0:1]
-; GLOBALNESS1-NEXT:    v_cmp_eq_u32_e64 s[56:57], 0, v2
+; GLOBALNESS1-NEXT:    v_cmp_nlt_f64_e64 s[56:57], 0, v[0:1]
+; GLOBALNESS1-NEXT:    v_cmp_eq_u32_e64 s[58:59], 0, v2
 ; GLOBALNESS1-NEXT:    s_branch .LBB1_15
 ; GLOBALNESS1-NEXT:  .LBB1_13: ; %Flow7
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_15 Depth=2
 ; GLOBALNESS1-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GLOBALNESS1-NEXT:  .LBB1_14: ; %bb63.i
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_15 Depth=2
-; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[96:97]
+; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[86:87]
 ; GLOBALNESS1-NEXT:    s_cbranch_vccz .LBB1_25
 ; GLOBALNESS1-NEXT:  .LBB1_15: ; %bb44.i
 ; GLOBALNESS1-NEXT:    ; Parent Loop BB1_4 Depth=1
 ; GLOBALNESS1-NEXT:    ; => This Inner Loop Header: Depth=2
-; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[92:93]
+; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[94:95]
 ; GLOBALNESS1-NEXT:    s_cbranch_vccnz .LBB1_14
 ; GLOBALNESS1-NEXT:  ; %bb.16: ; %bb46.i
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_15 Depth=2
-; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[94:95]
+; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[88:89]
 ; GLOBALNESS1-NEXT:    s_cbranch_vccnz .LBB1_14
 ; GLOBALNESS1-NEXT:  ; %bb.17: ; %bb50.i
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_15 Depth=2
-; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[40:41]
+; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[36:37]
 ; GLOBALNESS1-NEXT:    s_cbranch_vccnz .LBB1_20
 ; GLOBALNESS1-NEXT:  ; %bb.18: ; %bb3.i.i
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_15 Depth=2
-; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[90:91]
+; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[40:41]
 ; GLOBALNESS1-NEXT:    s_cbranch_vccnz .LBB1_20
 ; GLOBALNESS1-NEXT:  ; %bb.19: ; %bb6.i.i
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_15 Depth=2
-; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[54:55]
+; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[56:57]
 ; GLOBALNESS1-NEXT:  .LBB1_20: ; %spam.exit.i
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_15 Depth=2
-; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[46:47]
+; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[90:91]
 ; GLOBALNESS1-NEXT:    s_cbranch_vccnz .LBB1_14
 ; GLOBALNESS1-NEXT:  ; %bb.21: ; %bb55.i
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_15 Depth=2
-; GLOBALNESS1-NEXT:    s_add_u32 s58, s38, 40
-; GLOBALNESS1-NEXT:    s_addc_u32 s59, s39, 0
-; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[36:37]
-; GLOBALNESS1-NEXT:    s_mov_b64 s[8:9], s[58:59]
+; GLOBALNESS1-NEXT:    s_add_u32 s60, s62, 40
+; GLOBALNESS1-NEXT:    s_addc_u32 s61, s63, 0
+; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[64:65]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[6:7], s[54:55]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[8:9], s[60:61]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS1-NEXT:    s_mov_b32 s12, s62
-; GLOBALNESS1-NEXT:    s_mov_b32 s13, s61
-; GLOBALNESS1-NEXT:    s_mov_b32 s14, s60
-; GLOBALNESS1-NEXT:    v_mov_b32_e32 v31, v41
-; GLOBALNESS1-NEXT:    s_swappc_b64 s[30:31], s[98:99]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[44:45], 0, 0
-; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[36:37]
-; GLOBALNESS1-NEXT:    s_mov_b64 s[8:9], s[58:59]
+; GLOBALNESS1-NEXT:    s_mov_b32 s12, s100
+; GLOBALNESS1-NEXT:    s_mov_b32 s13, s99
+; GLOBALNESS1-NEXT:    s_mov_b32 s14, s98
+; GLOBALNESS1-NEXT:    v_mov_b32_e32 v31, v42
+; GLOBALNESS1-NEXT:    s_swappc_b64 s[30:31], s[66:67]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[46:47], 0, 0
+; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[64:65]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[6:7], s[54:55]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[8:9], s[60:61]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS1-NEXT:    s_mov_b32 s12, s62
-; GLOBALNESS1-NEXT:    s_mov_b32 s13, s61
-; GLOBALNESS1-NEXT:    s_mov_b32 s14, s60
-; GLOBALNESS1-NEXT:    v_mov_b32_e32 v31, v41
-; GLOBALNESS1-NEXT:    global_store_dwordx2 v[44:45], a[32:33], off
-; GLOBALNESS1-NEXT:    s_swappc_b64 s[30:31], s[98:99]
-; GLOBALNESS1-NEXT:    s_and_saveexec_b64 s[4:5], s[56:57]
+; GLOBALNESS1-NEXT:    s_mov_b32 s12, s100
+; GLOBALNESS1-NEXT:    s_mov_b32 s13, s99
+; GLOBALNESS1-NEXT:    s_mov_b32 s14, s98
+; GLOBALNESS1-NEXT:    v_mov_b32_e32 v31, v42
+; GLOBALNESS1-NEXT:    global_store_dwordx2 v[46:47], a[32:33], off
+; GLOBALNESS1-NEXT:    s_swappc_b64 s[30:31], s[66:67]
+; GLOBALNESS1-NEXT:    s_and_saveexec_b64 s[4:5], s[58:59]
 ; GLOBALNESS1-NEXT:    s_cbranch_execz .LBB1_13
 ; GLOBALNESS1-NEXT:  ; %bb.22: ; %bb62.i
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_15 Depth=2
-; GLOBALNESS1-NEXT:    v_mov_b32_e32 v43, v42
-; GLOBALNESS1-NEXT:    global_store_dwordx2 v[44:45], v[42:43], off
+; GLOBALNESS1-NEXT:    v_mov_b32_e32 v45, v44
+; GLOBALNESS1-NEXT:    global_store_dwordx2 v[46:47], v[44:45], off
 ; GLOBALNESS1-NEXT:    s_branch .LBB1_13
 ; GLOBALNESS1-NEXT:  .LBB1_23: ; %LeafBlock
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT:    s_cmp_lg_u32 s87, 0
+; GLOBALNESS1-NEXT:    s_cmp_lg_u32 s39, 0
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], 0
 ; GLOBALNESS1-NEXT:    s_cselect_b64 s[6:7], -1, 0
 ; GLOBALNESS1-NEXT:    s_and_b64 vcc, exec, s[6:7]
@@ -297,152 +312,170 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
 ; GLOBALNESS1-NEXT:    s_branch .LBB1_3
 ; GLOBALNESS1-NEXT:  .LBB1_25: ; %Flow14
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT:    s_mov_b32 s68, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s69, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s70, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s71, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s72, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s73, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s74, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s75, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s76, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s77, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s78, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s79, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s80, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s81, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s82, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s83, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s84, s89
-; GLOBALNESS1-NEXT:    s_mov_b32 s85, s89
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[0:1], s[68:69], s[68:69] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[2:3], s[70:71], s[70:71] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[4:5], s[72:73], s[72:73] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[6:7], s[74:75], s[74:75] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[8:9], s[76:77], s[76:77] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[10:11], s[78:79], s[78:79] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[12:13], s[80:81], s[80:81] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[14:15], s[82:83], s[82:83] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[16:17], s[84:85], s[84:85] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[18:19], s[86:87], s[86:87] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[20:21], s[88:89], s[88:89] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[22:23], s[90:91], s[90:91] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[24:25], s[92:93], s[92:93] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[26:27], s[94:95], s[94:95] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[28:29], s[96:97], s[96:97] op_sel:[0,1]
-; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[30:31], s[98:99], s[98:99] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[36:37]
+; GLOBALNESS1-NEXT:    s_mov_b32 s36, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s37, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s38, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s39, s93
+; GLOBALNESS1-NEXT:    s_mov_b64 s[6:7], s[40:41]
+; GLOBALNESS1-NEXT:    s_mov_b32 s40, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s41, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s42, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s43, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s44, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s45, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s46, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s47, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s48, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s49, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s50, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s51, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s52, s93
+; GLOBALNESS1-NEXT:    s_mov_b32 s53, s93
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[0:1], s[36:37], s[36:37] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[2:3], s[38:39], s[38:39] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[4:5], s[40:41], s[40:41] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[6:7], s[42:43], s[42:43] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[8:9], s[44:45], s[44:45] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[10:11], s[46:47], s[46:47] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[12:13], s[48:49], s[48:49] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[14:15], s[50:51], s[50:51] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[16:17], s[52:53], s[52:53] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[18:19], s[54:55], s[54:55] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[20:21], s[56:57], s[56:57] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[22:23], s[58:59], s[58:59] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[24:25], s[60:61], s[60:61] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[26:27], s[62:63], s[62:63] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[28:29], s[64:65], s[64:65] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[30:31], s[66:67], s[66:67] op_sel:[0,1]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[40:41], s[6:7]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[36:37], s[4:5]
+; GLOBALNESS1-NEXT:    s_mov_b32 s39, s75
+; GLOBALNESS1-NEXT:    s_mov_b64 s[42:43], s[72:73]
 ; GLOBALNESS1-NEXT:  .LBB1_26: ; %Flow15
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT:    s_or_b64 exec, exec, s[66:67]
-; GLOBALNESS1-NEXT:    s_and_saveexec_b64 s[4:5], s[52:53]
+; GLOBALNESS1-NEXT:    s_or_b64 exec, exec, s[70:71]
+; GLOBALNESS1-NEXT:    s_and_saveexec_b64 s[4:5], s[96:97]
 ; GLOBALNESS1-NEXT:    s_cbranch_execz .LBB1_2
 ; GLOBALNESS1-NEXT:  ; %bb.27: ; %bb67.i
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[48:49]
+; GLOBALNESS1-NEXT:    v_readlane_b32 s6, v41, 2
+; GLOBALNESS1-NEXT:    v_readlane_b32 s7, v41, 3
+; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[6:7]
 ; GLOBALNESS1-NEXT:    s_cbranch_vccnz .LBB1_1
 ; GLOBALNESS1-NEXT:  ; %bb.28: ; %bb69.i
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT:    v_mov_b32_e32 v43, v42
+; GLOBALNESS1-NEXT:    v_mov_b32_e32 v45, v44
 ; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[32:33], 0, 0
-; GLOBALNESS1-NEXT:    global_store_dwordx2 v[32:33], v[42:43], off
+; GLOBALNESS1-NEXT:    global_store_dwordx2 v[32:33], v[44:45], off
 ; GLOBALNESS1-NEXT:    s_branch .LBB1_1
 ; GLOBALNESS1-NEXT:  .LBB1_29: ; %bb73.i
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT:    v_mov_b32_e32 v43, v42
+; GLOBALNESS1-NEXT:    v_mov_b32_e32 v45, v44
 ; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[32:33], 0, 0
-; GLOBALNESS1-NEXT:    global_store_dwordx2 v[32:33], v[42:43], off
+; GLOBALNESS1-NEXT:    global_store_dwordx2 v[32:33], v[44:45], off
 ; GLOBALNESS1-NEXT:    s_branch .LBB1_2
 ; GLOBALNESS1-NEXT:  .LBB1_30: ; %loop.exit.guard
 ; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], -1
 ; GLOBALNESS1-NEXT:    s_cbranch_vccz .LBB1_32
 ; GLOBALNESS1-NEXT:  ; %bb.31: ; %bb7.i.i
-; GLOBALNESS1-NEXT:    s_add_u32 s8, s38, 40
-; GLOBALNESS1-NEXT:    s_addc_u32 s9, s39, 0
-; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[36:37]
+; GLOBALNESS1-NEXT:    s_add_u32 s8, s62, 40
+; GLOBALNESS1-NEXT:    s_addc_u32 s9, s63, 0
+; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[64:65]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[6:7], s[54:55]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS1-NEXT:    s_mov_b32 s12, s62
-; GLOBALNESS1-NEXT:    s_mov_b32 s13, s61
-; GLOBALNESS1-NEXT:    s_mov_b32 s14, s60
-; GLOBALNESS1-NEXT:    v_mov_b32_e32 v31, v41
-; GLOBALNESS1-NEXT:    s_getpc_b64 s[6:7]
-; GLOBALNESS1-NEXT:    s_add_u32 s6, s6, widget at rel32@lo+4
-; GLOBALNESS1-NEXT:    s_addc_u32 s7, s7, widget at rel32@hi+12
-; GLOBALNESS1-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GLOBALNESS1-NEXT:    s_mov_b32 s12, s100
+; GLOBALNESS1-NEXT:    s_mov_b32 s13, s99
+; GLOBALNESS1-NEXT:    s_mov_b32 s14, s98
+; GLOBALNESS1-NEXT:    v_mov_b32_e32 v31, v42
+; GLOBALNESS1-NEXT:    s_getpc_b64 s[16:17]
+; GLOBALNESS1-NEXT:    s_add_u32 s16, s16, widget at rel32@lo+4
+; GLOBALNESS1-NEXT:    s_addc_u32 s17, s17, widget at rel32@hi+12
+; GLOBALNESS1-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], 0
 ; GLOBALNESS1-NEXT:  .LBB1_32: ; %Flow
 ; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
 ; GLOBALNESS1-NEXT:    s_cbranch_vccnz .LBB1_34
 ; GLOBALNESS1-NEXT:  ; %bb.33: ; %bb11.i.i
-; GLOBALNESS1-NEXT:    s_add_u32 s8, s38, 40
-; GLOBALNESS1-NEXT:    s_addc_u32 s9, s39, 0
-; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[36:37]
+; GLOBALNESS1-NEXT:    s_add_u32 s8, s62, 40
+; GLOBALNESS1-NEXT:    s_addc_u32 s9, s63, 0
+; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[64:65]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[6:7], s[54:55]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS1-NEXT:    s_mov_b32 s12, s62
-; GLOBALNESS1-NEXT:    s_mov_b32 s13, s61
-; GLOBALNESS1-NEXT:    s_mov_b32 s14, s60
-; GLOBALNESS1-NEXT:    v_mov_b32_e32 v31, v41
-; GLOBALNESS1-NEXT:    s_getpc_b64 s[6:7]
-; GLOBALNESS1-NEXT:    s_add_u32 s6, s6, widget at rel32@lo+4
-; GLOBALNESS1-NEXT:    s_addc_u32 s7, s7, widget at rel32@hi+12
-; GLOBALNESS1-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GLOBALNESS1-NEXT:    s_mov_b32 s12, s100
+; GLOBALNESS1-NEXT:    s_mov_b32 s13, s99
+; GLOBALNESS1-NEXT:    s_mov_b32 s14, s98
+; GLOBALNESS1-NEXT:    v_mov_b32_e32 v31, v42
+; GLOBALNESS1-NEXT:    s_getpc_b64 s[16:17]
+; GLOBALNESS1-NEXT:    s_add_u32 s16, s16, widget at rel32@lo+4
+; GLOBALNESS1-NEXT:    s_addc_u32 s17, s17, widget at rel32@hi+12
+; GLOBALNESS1-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GLOBALNESS1-NEXT:  .LBB1_34: ; %UnifiedUnreachableBlock
 ;
 ; GLOBALNESS0-LABEL: kernel:
 ; GLOBALNESS0:       ; %bb.0: ; %bb
-; GLOBALNESS0-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GLOBALNESS0-NEXT:    s_load_dwordx4 s[84:87], s[6:7], 0x0
-; GLOBALNESS0-NEXT:    s_nop 0
-; GLOBALNESS0-NEXT:    s_load_dword s6, s[6:7], 0x14
-; GLOBALNESS0-NEXT:    v_mov_b32_e32 v41, v0
-; GLOBALNESS0-NEXT:    v_mov_b32_e32 v42, 0
+; GLOBALNESS0-NEXT:    s_mov_b64 s[54:55], s[6:7]
+; GLOBALNESS0-NEXT:    s_load_dwordx4 s[36:39], s[8:9], 0x0
+; GLOBALNESS0-NEXT:    s_load_dword s6, s[8:9], 0x14
+; GLOBALNESS0-NEXT:    v_mov_b32_e32 v42, v0
+; GLOBALNESS0-NEXT:    v_mov_b32_e32 v44, 0
 ; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[0:1], 0, 0
-; GLOBALNESS0-NEXT:    global_store_dword v[0:1], v42, off
+; GLOBALNESS0-NEXT:    global_store_dword v[0:1], v44, off
 ; GLOBALNESS0-NEXT:    s_waitcnt lgkmcnt(0)
-; GLOBALNESS0-NEXT:    global_load_dword v0, v42, s[84:85]
-; GLOBALNESS0-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
-; GLOBALNESS0-NEXT:    s_mov_b64 s[36:37], s[4:5]
-; GLOBALNESS0-NEXT:    s_load_dwordx2 s[4:5], s[38:39], 0x18
-; GLOBALNESS0-NEXT:    s_load_dword s7, s[38:39], 0x20
-; GLOBALNESS0-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
-; GLOBALNESS0-NEXT:    s_add_u32 s0, s0, s15
+; GLOBALNESS0-NEXT:    global_load_dword v0, v44, s[36:37]
+; GLOBALNESS0-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; GLOBALNESS0-NEXT:    s_mov_b64 s[62:63], s[4:5]
+; GLOBALNESS0-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x18
+; GLOBALNESS0-NEXT:    s_load_dword s7, s[8:9], 0x20
+; GLOBALNESS0-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; GLOBALNESS0-NEXT:    s_add_u32 s0, s0, s17
 ; GLOBALNESS0-NEXT:    s_addc_u32 s1, s1, 0
-; GLOBALNESS0-NEXT:    v_mov_b32_e32 v43, 0x40994400
-; GLOBALNESS0-NEXT:    s_bitcmp1_b32 s86, 0
+; GLOBALNESS0-NEXT:    v_mov_b32_e32 v45, 0x40994400
+; GLOBALNESS0-NEXT:    s_bitcmp1_b32 s38, 0
 ; GLOBALNESS0-NEXT:    s_waitcnt lgkmcnt(0)
-; GLOBALNESS0-NEXT:    v_cmp_ngt_f64_e64 s[40:41], s[4:5], v[42:43]
-; GLOBALNESS0-NEXT:    v_cmp_ngt_f64_e64 s[90:91], s[4:5], 0
+; GLOBALNESS0-NEXT:    v_cmp_ngt_f64_e64 s[36:37], s[4:5], v[44:45]
+; GLOBALNESS0-NEXT:    v_cmp_ngt_f64_e64 s[40:41], s[4:5], 0
 ; GLOBALNESS0-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GLOBALNESS0-NEXT:    s_xor_b64 s[92:93], s[4:5], -1
+; GLOBALNESS0-NEXT:    s_xor_b64 s[94:95], s[4:5], -1
 ; GLOBALNESS0-NEXT:    s_bitcmp1_b32 s6, 0
 ; GLOBALNESS0-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[4:5]
 ; GLOBALNESS0-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GLOBALNESS0-NEXT:    s_xor_b64 s[94:95], s[4:5], -1
+; GLOBALNESS0-NEXT:    s_xor_b64 s[88:89], s[4:5], -1
 ; GLOBALNESS0-NEXT:    s_bitcmp1_b32 s7, 0
 ; GLOBALNESS0-NEXT:    s_cselect_b64 s[4:5], -1, 0
 ; GLOBALNESS0-NEXT:    s_getpc_b64 s[6:7]
 ; GLOBALNESS0-NEXT:    s_add_u32 s6, s6, wobble at gotpcrel32@lo+4
 ; GLOBALNESS0-NEXT:    s_addc_u32 s7, s7, wobble at gotpcrel32@hi+12
-; GLOBALNESS0-NEXT:    s_load_dwordx2 s[98:99], s[6:7], 0x0
-; GLOBALNESS0-NEXT:    s_mov_b32 s58, s14
-; GLOBALNESS0-NEXT:    s_mov_b32 s59, s13
-; GLOBALNESS0-NEXT:    s_mov_b32 s60, s12
-; GLOBALNESS0-NEXT:    s_mov_b64 s[34:35], s[8:9]
-; GLOBALNESS0-NEXT:    s_mov_b64 s[88:89], 0x80
+; GLOBALNESS0-NEXT:    s_xor_b64 s[86:87], s[4:5], -1
+; GLOBALNESS0-NEXT:    s_load_dwordx2 s[66:67], s[6:7], 0x0
+; GLOBALNESS0-NEXT:    s_mov_b32 s98, s16
+; GLOBALNESS0-NEXT:    s_mov_b64 s[60:61], s[8:9]
+; GLOBALNESS0-NEXT:    s_mov_b32 s99, s15
+; GLOBALNESS0-NEXT:    s_mov_b32 s100, s14
+; GLOBALNESS0-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[92:93], 0x80
 ; GLOBALNESS0-NEXT:    v_cmp_ne_u32_e64 s[42:43], 1, v1
-; GLOBALNESS0-NEXT:    s_xor_b64 s[96:97], s[4:5], -1
-; GLOBALNESS0-NEXT:    s_mov_b32 s65, 0x3ff00000
+; GLOBALNESS0-NEXT:    s_mov_b32 s69, 0x3ff00000
 ; GLOBALNESS0-NEXT:    s_mov_b32 s32, 0
 ; GLOBALNESS0-NEXT:    ; implicit-def: $agpr32_agpr33_agpr34_agpr35_agpr36_agpr37_agpr38_agpr39_agpr40_agpr41_agpr42_agpr43_agpr44_agpr45_agpr46_agpr47_agpr48_agpr49_agpr50_agpr51_agpr52_agpr53_agpr54_agpr55_agpr56_agpr57_agpr58_agpr59_agpr60_agpr61_agpr62_agpr63
 ; GLOBALNESS0-NEXT:    s_waitcnt vmcnt(0)
-; GLOBALNESS0-NEXT:    v_cmp_gt_i32_e64 s[44:45], 0, v0
-; GLOBALNESS0-NEXT:    v_cmp_gt_i32_e64 s[46:47], 1, v0
-; GLOBALNESS0-NEXT:    v_cmp_eq_u32_e64 s[48:49], 1, v0
-; GLOBALNESS0-NEXT:    v_cmp_eq_u32_e64 s[50:51], 0, v0
+; GLOBALNESS0-NEXT:    v_cmp_gt_i32_e64 s[4:5], 0, v0
+; GLOBALNESS0-NEXT:    v_writelane_b32 v41, s4, 0
+; GLOBALNESS0-NEXT:    v_writelane_b32 v41, s5, 1
+; GLOBALNESS0-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v0
+; GLOBALNESS0-NEXT:    v_writelane_b32 v41, s4, 2
+; GLOBALNESS0-NEXT:    v_writelane_b32 v41, s5, 3
+; GLOBALNESS0-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GLOBALNESS0-NEXT:    v_writelane_b32 v41, s4, 4
+; GLOBALNESS0-NEXT:    v_cmp_gt_i32_e64 s[90:91], 1, v0
+; GLOBALNESS0-NEXT:    v_writelane_b32 v41, s5, 5
 ; GLOBALNESS0-NEXT:    s_branch .LBB1_4
 ; GLOBALNESS0-NEXT:  .LBB1_1: ; %bb70.i
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[50:51]
+; GLOBALNESS0-NEXT:    v_readlane_b32 s6, v41, 4
+; GLOBALNESS0-NEXT:    v_readlane_b32 s7, v41, 5
+; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[6:7]
 ; GLOBALNESS0-NEXT:    s_cbranch_vccz .LBB1_29
 ; GLOBALNESS0-NEXT:  .LBB1_2: ; %Flow6
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_4 Depth=1
@@ -488,31 +521,32 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
 ; GLOBALNESS0-NEXT:  .LBB1_4: ; %bb5
 ; GLOBALNESS0-NEXT:    ; =>This Loop Header: Depth=1
 ; GLOBALNESS0-NEXT:    ; Child Loop BB1_15 Depth 2
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[0:1], s[88:89], s[88:89] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[0:1], s[92:93], s[92:93] op_sel:[0,1]
 ; GLOBALNESS0-NEXT:    flat_load_dword v40, v[0:1]
-; GLOBALNESS0-NEXT:    s_add_u32 s8, s38, 40
-; GLOBALNESS0-NEXT:    buffer_store_dword v42, off, s[0:3], 0
-; GLOBALNESS0-NEXT:    flat_load_dword v44, v[0:1]
-; GLOBALNESS0-NEXT:    s_addc_u32 s9, s39, 0
-; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[36:37]
+; GLOBALNESS0-NEXT:    s_add_u32 s8, s60, 40
+; GLOBALNESS0-NEXT:    buffer_store_dword v44, off, s[0:3], 0
+; GLOBALNESS0-NEXT:    flat_load_dword v43, v[0:1]
+; GLOBALNESS0-NEXT:    s_addc_u32 s9, s61, 0
+; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[62:63]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[6:7], s[54:55]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS0-NEXT:    s_mov_b32 s12, s60
-; GLOBALNESS0-NEXT:    s_mov_b32 s13, s59
-; GLOBALNESS0-NEXT:    s_mov_b32 s14, s58
-; GLOBALNESS0-NEXT:    v_mov_b32_e32 v31, v41
+; GLOBALNESS0-NEXT:    s_mov_b32 s12, s100
+; GLOBALNESS0-NEXT:    s_mov_b32 s13, s99
+; GLOBALNESS0-NEXT:    s_mov_b32 s14, s98
+; GLOBALNESS0-NEXT:    v_mov_b32_e32 v31, v42
 ; GLOBALNESS0-NEXT:    s_waitcnt lgkmcnt(0)
-; GLOBALNESS0-NEXT:    s_swappc_b64 s[30:31], s[98:99]
+; GLOBALNESS0-NEXT:    s_swappc_b64 s[30:31], s[66:67]
 ; GLOBALNESS0-NEXT:    s_and_b64 vcc, exec, s[42:43]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[6:7], -1
 ; GLOBALNESS0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GLOBALNESS0-NEXT:    s_cbranch_vccnz .LBB1_8
 ; GLOBALNESS0-NEXT:  ; %bb.5: ; %NodeBlock
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT:    s_cmp_lt_i32 s87, 1
+; GLOBALNESS0-NEXT:    s_cmp_lt_i32 s39, 1
 ; GLOBALNESS0-NEXT:    s_cbranch_scc1 .LBB1_7
 ; GLOBALNESS0-NEXT:  ; %bb.6: ; %LeafBlock3
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT:    s_cmp_lg_u32 s87, 1
+; GLOBALNESS0-NEXT:    s_cmp_lg_u32 s39, 1
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], -1
 ; GLOBALNESS0-NEXT:    s_cselect_b64 s[6:7], -1, 0
 ; GLOBALNESS0-NEXT:    s_cbranch_execnz .LBB1_8
@@ -529,124 +563,130 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_4 Depth=1
 ; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[32:33], 0, 0
 ; GLOBALNESS0-NEXT:    flat_load_dword v0, v[32:33]
-; GLOBALNESS0-NEXT:    s_mov_b32 s64, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s66, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s67, s65
-; GLOBALNESS0-NEXT:    s_mov_b32 s68, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s69, s65
-; GLOBALNESS0-NEXT:    s_mov_b32 s70, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s71, s65
-; GLOBALNESS0-NEXT:    s_mov_b32 s72, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s73, s65
-; GLOBALNESS0-NEXT:    s_mov_b32 s74, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s75, s65
-; GLOBALNESS0-NEXT:    s_mov_b32 s76, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s77, s65
-; GLOBALNESS0-NEXT:    s_mov_b32 s78, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s79, s65
-; GLOBALNESS0-NEXT:    s_mov_b32 s80, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s81, s65
+; GLOBALNESS0-NEXT:    s_mov_b32 s68, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s70, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s71, s69
+; GLOBALNESS0-NEXT:    s_mov_b32 s72, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s73, s69
+; GLOBALNESS0-NEXT:    s_mov_b32 s74, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s75, s69
+; GLOBALNESS0-NEXT:    s_mov_b32 s76, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s77, s69
+; GLOBALNESS0-NEXT:    s_mov_b32 s78, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s79, s69
+; GLOBALNESS0-NEXT:    s_mov_b32 s80, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s81, s69
+; GLOBALNESS0-NEXT:    s_mov_b32 s82, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s83, s69
+; GLOBALNESS0-NEXT:    s_mov_b32 s84, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s85, s69
 ; GLOBALNESS0-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GLOBALNESS0-NEXT:    v_cmp_gt_i32_e64 s[52:53], 0, v0
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[0:1], s[64:65], s[64:65] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[2:3], s[66:67], s[66:67] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[4:5], s[68:69], s[68:69] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[6:7], s[70:71], s[70:71] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[8:9], s[72:73], s[72:73] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[10:11], s[74:75], s[74:75] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[12:13], s[76:77], s[76:77] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[14:15], s[78:79], s[78:79] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[16:17], s[80:81], s[80:81] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[18:19], s[82:83], s[82:83] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[20:21], s[84:85], s[84:85] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[22:23], s[86:87], s[86:87] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[24:25], s[88:89], s[88:89] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[26:27], s[90:91], s[90:91] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[28:29], s[92:93], s[92:93] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[30:31], s[94:95], s[94:95] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    s_and_saveexec_b64 s[66:67], s[52:53]
+; GLOBALNESS0-NEXT:    v_cmp_gt_i32_e64 s[96:97], 0, v0
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[0:1], s[68:69], s[68:69] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[2:3], s[70:71], s[70:71] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[4:5], s[72:73], s[72:73] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[6:7], s[74:75], s[74:75] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[8:9], s[76:77], s[76:77] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[10:11], s[78:79], s[78:79] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[12:13], s[80:81], s[80:81] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[14:15], s[82:83], s[82:83] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[16:17], s[84:85], s[84:85] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[18:19], s[86:87], s[86:87] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[20:21], s[88:89], s[88:89] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[22:23], s[90:91], s[90:91] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[24:25], s[92:93], s[92:93] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[26:27], s[94:95], s[94:95] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[28:29], s[96:97], s[96:97] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[30:31], s[98:99], s[98:99] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    s_and_saveexec_b64 s[70:71], s[96:97]
 ; GLOBALNESS0-NEXT:    s_cbranch_execz .LBB1_26
 ; GLOBALNESS0-NEXT:  ; %bb.10: ; %bb33.i
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_4 Depth=1
 ; GLOBALNESS0-NEXT:    global_load_dwordx2 v[0:1], v[32:33], off
-; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[44:45]
+; GLOBALNESS0-NEXT:    v_readlane_b32 s4, v41, 0
+; GLOBALNESS0-NEXT:    v_readlane_b32 s5, v41, 1
+; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
 ; GLOBALNESS0-NEXT:    s_cbranch_vccnz .LBB1_12
 ; GLOBALNESS0-NEXT:  ; %bb.11: ; %bb39.i
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT:    v_mov_b32_e32 v43, v42
+; GLOBALNESS0-NEXT:    v_mov_b32_e32 v45, v44
 ; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[2:3], 0, 0
-; GLOBALNESS0-NEXT:    global_store_dwordx2 v[2:3], v[42:43], off
+; GLOBALNESS0-NEXT:    global_store_dwordx2 v[2:3], v[44:45], off
 ; GLOBALNESS0-NEXT:  .LBB1_12: ; %bb44.lr.ph.i
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v44
+; GLOBALNESS0-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v43
 ; GLOBALNESS0-NEXT:    v_cndmask_b32_e32 v2, 0, v40, vcc
+; GLOBALNESS0-NEXT:    s_mov_b64 s[72:73], s[42:43]
+; GLOBALNESS0-NEXT:    s_mov_b32 s75, s39
 ; GLOBALNESS0-NEXT:    s_waitcnt vmcnt(0)
-; GLOBALNESS0-NEXT:    v_cmp_nlt_f64_e64 s[54:55], 0, v[0:1]
-; GLOBALNESS0-NEXT:    v_cmp_eq_u32_e64 s[56:57], 0, v2
+; GLOBALNESS0-NEXT:    v_cmp_nlt_f64_e64 s[56:57], 0, v[0:1]
+; GLOBALNESS0-NEXT:    v_cmp_eq_u32_e64 s[58:59], 0, v2
 ; GLOBALNESS0-NEXT:    s_branch .LBB1_15
 ; GLOBALNESS0-NEXT:  .LBB1_13: ; %Flow7
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_15 Depth=2
 ; GLOBALNESS0-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GLOBALNESS0-NEXT:  .LBB1_14: ; %bb63.i
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_15 Depth=2
-; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[96:97]
+; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[86:87]
 ; GLOBALNESS0-NEXT:    s_cbranch_vccz .LBB1_25
 ; GLOBALNESS0-NEXT:  .LBB1_15: ; %bb44.i
 ; GLOBALNESS0-NEXT:    ; Parent Loop BB1_4 Depth=1
 ; GLOBALNESS0-NEXT:    ; => This Inner Loop Header: Depth=2
-; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[92:93]
+; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[94:95]
 ; GLOBALNESS0-NEXT:    s_cbranch_vccnz .LBB1_14
 ; GLOBALNESS0-NEXT:  ; %bb.16: ; %bb46.i
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_15 Depth=2
-; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[94:95]
+; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[88:89]
 ; GLOBALNESS0-NEXT:    s_cbranch_vccnz .LBB1_14
 ; GLOBALNESS0-NEXT:  ; %bb.17: ; %bb50.i
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_15 Depth=2
-; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[40:41]
+; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[36:37]
 ; GLOBALNESS0-NEXT:    s_cbranch_vccnz .LBB1_20
 ; GLOBALNESS0-NEXT:  ; %bb.18: ; %bb3.i.i
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_15 Depth=2
-; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[90:91]
+; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[40:41]
 ; GLOBALNESS0-NEXT:    s_cbranch_vccnz .LBB1_20
 ; GLOBALNESS0-NEXT:  ; %bb.19: ; %bb6.i.i
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_15 Depth=2
-; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[54:55]
+; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[56:57]
 ; GLOBALNESS0-NEXT:  .LBB1_20: ; %spam.exit.i
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_15 Depth=2
-; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[46:47]
+; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[90:91]
 ; GLOBALNESS0-NEXT:    s_cbranch_vccnz .LBB1_14
 ; GLOBALNESS0-NEXT:  ; %bb.21: ; %bb55.i
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_15 Depth=2
-; GLOBALNESS0-NEXT:    s_add_u32 s62, s38, 40
-; GLOBALNESS0-NEXT:    s_addc_u32 s63, s39, 0
-; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[36:37]
-; GLOBALNESS0-NEXT:    s_mov_b64 s[8:9], s[62:63]
+; GLOBALNESS0-NEXT:    s_add_u32 s64, s60, 40
+; GLOBALNESS0-NEXT:    s_addc_u32 s65, s61, 0
+; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[62:63]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[6:7], s[54:55]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[8:9], s[64:65]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS0-NEXT:    s_mov_b32 s12, s60
-; GLOBALNESS0-NEXT:    s_mov_b32 s13, s59
-; GLOBALNESS0-NEXT:    s_mov_b32 s14, s58
-; GLOBALNESS0-NEXT:    v_mov_b32_e32 v31, v41
-; GLOBALNESS0-NEXT:    s_swappc_b64 s[30:31], s[98:99]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[44:45], 0, 0
-; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[36:37]
-; GLOBALNESS0-NEXT:    s_mov_b64 s[8:9], s[62:63]
+; GLOBALNESS0-NEXT:    s_mov_b32 s12, s100
+; GLOBALNESS0-NEXT:    s_mov_b32 s13, s99
+; GLOBALNESS0-NEXT:    s_mov_b32 s14, s98
+; GLOBALNESS0-NEXT:    v_mov_b32_e32 v31, v42
+; GLOBALNESS0-NEXT:    s_swappc_b64 s[30:31], s[66:67]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[46:47], 0, 0
+; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[62:63]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[6:7], s[54:55]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[8:9], s[64:65]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS0-NEXT:    s_mov_b32 s12, s60
-; GLOBALNESS0-NEXT:    s_mov_b32 s13, s59
-; GLOBALNESS0-NEXT:    s_mov_b32 s14, s58
-; GLOBALNESS0-NEXT:    v_mov_b32_e32 v31, v41
-; GLOBALNESS0-NEXT:    global_store_dwordx2 v[44:45], a[32:33], off
-; GLOBALNESS0-NEXT:    s_swappc_b64 s[30:31], s[98:99]
-; GLOBALNESS0-NEXT:    s_and_saveexec_b64 s[4:5], s[56:57]
+; GLOBALNESS0-NEXT:    s_mov_b32 s12, s100
+; GLOBALNESS0-NEXT:    s_mov_b32 s13, s99
+; GLOBALNESS0-NEXT:    s_mov_b32 s14, s98
+; GLOBALNESS0-NEXT:    v_mov_b32_e32 v31, v42
+; GLOBALNESS0-NEXT:    global_store_dwordx2 v[46:47], a[32:33], off
+; GLOBALNESS0-NEXT:    s_swappc_b64 s[30:31], s[66:67]
+; GLOBALNESS0-NEXT:    s_and_saveexec_b64 s[4:5], s[58:59]
 ; GLOBALNESS0-NEXT:    s_cbranch_execz .LBB1_13
 ; GLOBALNESS0-NEXT:  ; %bb.22: ; %bb62.i
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_15 Depth=2
-; GLOBALNESS0-NEXT:    v_mov_b32_e32 v43, v42
-; GLOBALNESS0-NEXT:    global_store_dwordx2 v[44:45], v[42:43], off
+; GLOBALNESS0-NEXT:    v_mov_b32_e32 v45, v44
+; GLOBALNESS0-NEXT:    global_store_dwordx2 v[46:47], v[44:45], off
 ; GLOBALNESS0-NEXT:    s_branch .LBB1_13
 ; GLOBALNESS0-NEXT:  .LBB1_23: ; %LeafBlock
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT:    s_cmp_lg_u32 s87, 0
+; GLOBALNESS0-NEXT:    s_cmp_lg_u32 s39, 0
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], 0
 ; GLOBALNESS0-NEXT:    s_cselect_b64 s[6:7], -1, 0
 ; GLOBALNESS0-NEXT:    s_and_b64 vcc, exec, s[6:7]
@@ -657,95 +697,105 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i,
 ; GLOBALNESS0-NEXT:    s_branch .LBB1_3
 ; GLOBALNESS0-NEXT:  .LBB1_25: ; %Flow14
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT:    s_mov_b32 s68, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s69, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s70, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s71, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s72, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s73, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s74, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s75, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s76, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s77, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s78, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s79, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s80, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s81, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s82, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s83, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s84, s89
-; GLOBALNESS0-NEXT:    s_mov_b32 s85, s89
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[0:1], s[68:69], s[68:69] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[2:3], s[70:71], s[70:71] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[4:5], s[72:73], s[72:73] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[6:7], s[74:75], s[74:75] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[8:9], s[76:77], s[76:77] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[10:11], s[78:79], s[78:79] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[12:13], s[80:81], s[80:81] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[14:15], s[82:83], s[82:83] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[16:17], s[84:85], s[84:85] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[18:19], s[86:87], s[86:87] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[20:21], s[88:89], s[88:89] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[22:23], s[90:91], s[90:91] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[24:25], s[92:93], s[92:93] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[26:27], s[94:95], s[94:95] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[28:29], s[96:97], s[96:97] op_sel:[0,1]
-; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[30:31], s[98:99], s[98:99] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[36:37]
+; GLOBALNESS0-NEXT:    s_mov_b32 s36, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s37, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s38, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s39, s93
+; GLOBALNESS0-NEXT:    s_mov_b64 s[6:7], s[40:41]
+; GLOBALNESS0-NEXT:    s_mov_b32 s40, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s41, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s42, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s43, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s44, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s45, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s46, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s47, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s48, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s49, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s50, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s51, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s52, s93
+; GLOBALNESS0-NEXT:    s_mov_b32 s53, s93
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[0:1], s[36:37], s[36:37] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[2:3], s[38:39], s[38:39] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[4:5], s[40:41], s[40:41] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[6:7], s[42:43], s[42:43] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[8:9], s[44:45], s[44:45] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[10:11], s[46:47], s[46:47] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[12:13], s[48:49], s[48:49] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[14:15], s[50:51], s[50:51] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[16:17], s[52:53], s[52:53] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[18:19], s[54:55], s[54:55] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[20:21], s[56:57], s[56:57] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[22:23], s[58:59], s[58:59] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[24:25], s[60:61], s[60:61] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[26:27], s[62:63], s[62:63] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[28:29], s[64:65], s[64:65] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[30:31], s[66:67], s[66:67] op_sel:[0,1]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[40:41], s[6:7]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[36:37], s[4:5]
+; GLOBALNESS0-NEXT:    s_mov_b32 s39, s75
+; GLOBALNESS0-NEXT:    s_mov_b64 s[42:43], s[72:73]
 ; GLOBALNESS0-NEXT:  .LBB1_26: ; %Flow15
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT:    s_or_b64 exec, exec, s[66:67]
-; GLOBALNESS0-NEXT:    s_and_saveexec_b64 s[4:5], s[52:53]
+; GLOBALNESS0-NEXT:    s_or_b64 exec, exec, s[70:71]
+; GLOBALNESS0-NEXT:    s_and_saveexec_b64 s[4:5], s[96:97]
 ; GLOBALNESS0-NEXT:    s_cbranch_execz .LBB1_2
 ; GLOBALNESS0-NEXT:  ; %bb.27: ; %bb67.i
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[48:49]
+; GLOBALNESS0-NEXT:    v_readlane_b32 s6, v41, 2
+; GLOBALNESS0-NEXT:    v_readlane_b32 s7, v41, 3
+; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[6:7]
 ; GLOBALNESS0-NEXT:    s_cbranch_vccnz .LBB1_1
 ; GLOBALNESS0-NEXT:  ; %bb.28: ; %bb69.i
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT:    v_mov_b32_e32 v43, v42
+; GLOBALNESS0-NEXT:    v_mov_b32_e32 v45, v44
 ; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[32:33], 0, 0
-; GLOBALNESS0-NEXT:    global_store_dwordx2 v[32:33], v[42:43], off
+; GLOBALNESS0-NEXT:    global_store_dwordx2 v[32:33], v[44:45], off
 ; GLOBALNESS0-NEXT:    s_branch .LBB1_1
 ; GLOBALNESS0-NEXT:  .LBB1_29: ; %bb73.i
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT:    v_mov_b32_e32 v43, v42
+; GLOBALNESS0-NEXT:    v_mov_b32_e32 v45, v44
 ; GLOBALNESS0-NEXT:    v_pk_mov_b32 v[32:33], 0, 0
-; GLOBALNESS0-NEXT:    global_store_dwordx2 v[32:33], v[42:43], off
+; GLOBALNESS0-NEXT:    global_store_dwordx2 v[32:33], v[44:45], off
 ; GLOBALNESS0-NEXT:    s_branch .LBB1_2
 ; GLOBALNESS0-NEXT:  .LBB1_30: ; %loop.exit.guard
 ; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], -1
 ; GLOBALNESS0-NEXT:    s_cbranch_vccz .LBB1_32
 ; GLOBALNESS0-NEXT:  ; %bb.31: ; %bb7.i.i
-; GLOBALNESS0-NEXT:    s_add_u32 s8, s38, 40
-; GLOBALNESS0-NEXT:    s_addc_u32 s9, s39, 0
-; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[36:37]
+; GLOBALNESS0-NEXT:    s_add_u32 s8, s60, 40
+; GLOBALNESS0-NEXT:    s_addc_u32 s9, s61, 0
+; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[62:63]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[6:7], s[54:55]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS0-NEXT:    s_mov_b32 s12, s60
-; GLOBALNESS0-NEXT:    s_mov_b32 s13, s59
-; GLOBALNESS0-NEXT:    s_mov_b32 s14, s58
-; GLOBALNESS0-NEXT:    v_mov_b32_e32 v31, v41
-; GLOBALNESS0-NEXT:    s_getpc_b64 s[6:7]
-; GLOBALNESS0-NEXT:    s_add_u32 s6, s6, widget at rel32@lo+4
-; GLOBALNESS0-NEXT:    s_addc_u32 s7, s7, widget at rel32@hi+12
-; GLOBALNESS0-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GLOBALNESS0-NEXT:    s_mov_b32 s12, s100
+; GLOBALNESS0-NEXT:    s_mov_b32 s13, s99
+; GLOBALNESS0-NEXT:    s_mov_b32 s14, s98
+; GLOBALNESS0-NEXT:    v_mov_b32_e32 v31, v42
+; GLOBALNESS0-NEXT:    s_getpc_b64 s[16:17]
+; GLOBALNESS0-NEXT:    s_add_u32 s16, s16, widget at rel32@lo+4
+; GLOBALNESS0-NEXT:    s_addc_u32 s17, s17, widget at rel32@hi+12
+; GLOBALNESS0-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], 0
 ; GLOBALNESS0-NEXT:  .LBB1_32: ; %Flow
 ; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
 ; GLOBALNESS0-NEXT:    s_cbranch_vccnz .LBB1_34
 ; GLOBALNESS0-NEXT:  ; %bb.33: ; %bb11.i.i
-; GLOBALNESS0-NEXT:    s_add_u32 s8, s38, 40
-; GLOBALNESS0-NEXT:    s_addc_u32 s9, s39, 0
-; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[36:37]
+; GLOBALNESS0-NEXT:    s_add_u32 s8, s60, 40
+; GLOBALNESS0-NEXT:    s_addc_u32 s9, s61, 0
+; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[62:63]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[6:7], s[54:55]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS0-NEXT:    s_mov_b32 s12, s60
-; GLOBALNESS0-NEXT:    s_mov_b32 s13, s59
-; GLOBALNESS0-NEXT:    s_mov_b32 s14, s58
-; GLOBALNESS0-NEXT:    v_mov_b32_e32 v31, v41
-; GLOBALNESS0-NEXT:    s_getpc_b64 s[6:7]
-; GLOBALNESS0-NEXT:    s_add_u32 s6, s6, widget at rel32@lo+4
-; GLOBALNESS0-NEXT:    s_addc_u32 s7, s7, widget at rel32@hi+12
-; GLOBALNESS0-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GLOBALNESS0-NEXT:    s_mov_b32 s12, s100
+; GLOBALNESS0-NEXT:    s_mov_b32 s13, s99
+; GLOBALNESS0-NEXT:    s_mov_b32 s14, s98
+; GLOBALNESS0-NEXT:    v_mov_b32_e32 v31, v42
+; GLOBALNESS0-NEXT:    s_getpc_b64 s[16:17]
+; GLOBALNESS0-NEXT:    s_add_u32 s16, s16, widget at rel32@lo+4
+; GLOBALNESS0-NEXT:    s_addc_u32 s17, s17, widget at rel32@hi+12
+; GLOBALNESS0-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GLOBALNESS0-NEXT:  .LBB1_34: ; %UnifiedUnreachableBlock
 bb:
   store i32 0, i32 addrspace(1)* null, align 4

diff  --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
index f15c05b37db45..48e7db518fea6 100644
--- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
+++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
@@ -290,14 +290,14 @@ define hidden void @blam() {
 ; GCN-NEXT:    v_writelane_b32 v40, s47, 15
 ; GCN-NEXT:    v_writelane_b32 v40, s48, 16
 ; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    s_mov_b64 s[34:35], s[6:7]
 ; GCN-NEXT:    v_mov_b32_e32 v41, v31
 ; GCN-NEXT:    s_mov_b32 s44, s15
 ; GCN-NEXT:    s_mov_b32 s45, s14
 ; GCN-NEXT:    s_mov_b32 s46, s13
 ; GCN-NEXT:    s_mov_b32 s47, s12
-; GCN-NEXT:    s_mov_b64 s[36:37], s[10:11]
-; GCN-NEXT:    s_mov_b64 s[38:39], s[8:9]
+; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
 ; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
 ; GCN-NEXT:    s_mov_b64 s[4:5], 0
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
@@ -354,9 +354,9 @@ define hidden void @blam() {
 ; GCN-NEXT:    ; in Loop: Header=BB1_4 Depth=2
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT:    s_mov_b64 s[6:7], s[34:35]
-; GCN-NEXT:    s_mov_b64 s[8:9], s[38:39]
-; GCN-NEXT:    s_mov_b64 s[10:11], s[36:37]
+; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
 ; GCN-NEXT:    s_mov_b32 s12, s47
 ; GCN-NEXT:    s_mov_b32 s13, s46
 ; GCN-NEXT:    s_mov_b32 s14, s45

diff  --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
index 27e9c02966d6b..eb33071dc1ae7 100644
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
@@ -265,7 +265,7 @@ SerializeToHsacoPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
 
     // This constant must always match the default code object ABI version
     // of the AMDGPU backend.
-    addControlConstant("__oclc_ABI_version", 500, 32);
+    addControlConstant("__oclc_ABI_version", 400, 32);
   }
 
   // Determine libraries we need to link - order matters due to dependencies


        


More information about the cfe-commits mailing list