[llvm] acd1399 - [GlobalISel] Re-generate some call lowering tests with the new CHECK-NEXT behaviour.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 26 17:51:07 PDT 2021


Author: Amara Emerson
Date: 2021-09-26T17:25:38-07:00
New Revision: acd13994d17fe64269f71324840bd45031a2e552

URL: https://github.com/llvm/llvm-project/commit/acd13994d17fe64269f71324840bd45031a2e552
DIFF: https://github.com/llvm/llvm-project/commit/acd13994d17fe64269f71324840bd45031a2e552.diff

LOG: [GlobalISel] Re-generate some call lowering tests with the new CHECK-NEXT behaviour.

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll
    llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll
    llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll
    llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-s128-div.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
    llvm/test/CodeGen/Mips/GlobalISel/irtranslator/extend_args.ll
    llvm/test/CodeGen/Mips/GlobalISel/irtranslator/float_args.ll
    llvm/test/CodeGen/Mips/GlobalISel/irtranslator/stack_args.ll
    llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll
index 0340ab94357e9..1caef7c8e8772 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll
@@ -8,42 +8,42 @@ declare void @varargs(i32, double, i64, ...)
 define void @test_varargs() {
   ; CHECK-LABEL: name: test_varargs
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
-  ; CHECK:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-  ; CHECK:   [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
-  ; CHECK:   [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
-  ; CHECK:   [[C7:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
-  ; CHECK:   ADJCALLSTACKDOWN 40, 0, implicit-def $sp, implicit $sp
-  ; CHECK:   $w0 = COPY [[C]](s32)
-  ; CHECK:   $d0 = COPY [[C1]](s64)
-  ; CHECK:   $x1 = COPY [[C2]](s64)
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C3]](s8)
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
-  ; CHECK:   [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
-  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ANYEXT]](s32)
-  ; CHECK:   G_STORE [[ANYEXT1]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1)
-  ; CHECK:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[C4]](s16)
-  ; CHECK:   [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
-  ; CHECK:   [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ANYEXT2]](s32)
-  ; CHECK:   G_STORE [[ANYEXT3]](s64), [[PTR_ADD1]](p0) :: (store (s64) into stack + 8, align 1)
-  ; CHECK:   [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64)
-  ; CHECK:   [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[C5]](s32)
-  ; CHECK:   G_STORE [[ANYEXT4]](s64), [[PTR_ADD2]](p0) :: (store (s64) into stack + 16, align 1)
-  ; CHECK:   [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
-  ; CHECK:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
-  ; CHECK:   G_STORE [[C6]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 24, align 1)
-  ; CHECK:   [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-  ; CHECK:   [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64)
-  ; CHECK:   G_STORE [[C7]](s64), [[PTR_ADD4]](p0) :: (store (s64) into stack + 32, align 1)
-  ; CHECK:   BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
-  ; CHECK:   ADJCALLSTACKUP 40, 0, implicit-def $sp, implicit $sp
-  ; CHECK:   RET_ReallyLR
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 40, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   $w0 = COPY [[C]](s32)
+  ; CHECK-NEXT:   $d0 = COPY [[C1]](s64)
+  ; CHECK-NEXT:   $x1 = COPY [[C2]](s64)
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C3]](s8)
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
+  ; CHECK-NEXT:   [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
+  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ANYEXT]](s32)
+  ; CHECK-NEXT:   G_STORE [[ANYEXT1]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1)
+  ; CHECK-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[C4]](s16)
+  ; CHECK-NEXT:   [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
+  ; CHECK-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ANYEXT2]](s32)
+  ; CHECK-NEXT:   G_STORE [[ANYEXT3]](s64), [[PTR_ADD1]](p0) :: (store (s64) into stack + 8, align 1)
+  ; CHECK-NEXT:   [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64)
+  ; CHECK-NEXT:   [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[C5]](s32)
+  ; CHECK-NEXT:   G_STORE [[ANYEXT4]](s64), [[PTR_ADD2]](p0) :: (store (s64) into stack + 16, align 1)
+  ; CHECK-NEXT:   [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
+  ; CHECK-NEXT:   G_STORE [[C6]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 24, align 1)
+  ; CHECK-NEXT:   [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64)
+  ; CHECK-NEXT:   G_STORE [[C7]](s64), [[PTR_ADD4]](p0) :: (store (s64) into stack + 32, align 1)
+  ; CHECK-NEXT:   BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
+  ; CHECK-NEXT:   ADJCALLSTACKUP 40, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   RET_ReallyLR
   call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i8 3, i16 1, i32 4, float 1.0, double 2.0)
   ret void
 }
@@ -53,48 +53,48 @@ declare i64 @i8i16callee(i64 %a1, i64 %a2, i64 %a3, i8 signext %a4, i16 signext
 define i32 @i8i16caller() nounwind readnone {
   ; CHECK-LABEL: name: i8i16caller
   ; CHECK: bb.1.entry:
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-  ; CHECK:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-  ; CHECK:   [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
-  ; CHECK:   [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
-  ; CHECK:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-  ; CHECK:   [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-  ; CHECK:   [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-  ; CHECK:   [[C8:%[0-9]+]]:_(s8) = G_CONSTANT i8 97
-  ; CHECK:   [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 98
-  ; CHECK:   [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 99
-  ; CHECK:   [[C11:%[0-9]+]]:_(s8) = G_CONSTANT i8 100
-  ; CHECK:   ADJCALLSTACKDOWN 6, 0, implicit-def $sp, implicit $sp
-  ; CHECK:   $x0 = COPY [[C]](s64)
-  ; CHECK:   $x1 = COPY [[C1]](s64)
-  ; CHECK:   $x2 = COPY [[C2]](s64)
-  ; CHECK:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C3]](s8)
-  ; CHECK:   $w3 = COPY [[SEXT]](s32)
-  ; CHECK:   [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[C4]](s16)
-  ; CHECK:   $w4 = COPY [[SEXT1]](s32)
-  ; CHECK:   $x5 = COPY [[C5]](s64)
-  ; CHECK:   $x6 = COPY [[C6]](s64)
-  ; CHECK:   $x7 = COPY [[C7]](s64)
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
-  ; CHECK:   [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64)
-  ; CHECK:   G_STORE [[C8]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
-  ; CHECK:   [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s64)
-  ; CHECK:   G_STORE [[C9]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 2, align 1)
-  ; CHECK:   [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s64)
-  ; CHECK:   G_STORE [[C10]](s8), [[PTR_ADD2]](p0) :: (store (s8) into stack + 4)
-  ; CHECK:   [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-  ; CHECK:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s64)
-  ; CHECK:   G_STORE [[C11]](s8), [[PTR_ADD3]](p0) :: (store (s8) into stack + 5)
-  ; CHECK:   BL @i8i16callee, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3, implicit $w4, implicit $x5, implicit $x6, implicit $x7, implicit-def $x0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
-  ; CHECK:   ADJCALLSTACKUP 6, 0, implicit-def $sp, implicit $sp
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
-  ; CHECK:   $w0 = COPY [[TRUNC]](s32)
-  ; CHECK:   RET_ReallyLR implicit $w0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+  ; CHECK-NEXT:   [[C8:%[0-9]+]]:_(s8) = G_CONSTANT i8 97
+  ; CHECK-NEXT:   [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 98
+  ; CHECK-NEXT:   [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 99
+  ; CHECK-NEXT:   [[C11:%[0-9]+]]:_(s8) = G_CONSTANT i8 100
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 6, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   $x0 = COPY [[C]](s64)
+  ; CHECK-NEXT:   $x1 = COPY [[C1]](s64)
+  ; CHECK-NEXT:   $x2 = COPY [[C2]](s64)
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C3]](s8)
+  ; CHECK-NEXT:   $w3 = COPY [[SEXT]](s32)
+  ; CHECK-NEXT:   [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[C4]](s16)
+  ; CHECK-NEXT:   $w4 = COPY [[SEXT1]](s32)
+  ; CHECK-NEXT:   $x5 = COPY [[C5]](s64)
+  ; CHECK-NEXT:   $x6 = COPY [[C6]](s64)
+  ; CHECK-NEXT:   $x7 = COPY [[C7]](s64)
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
+  ; CHECK-NEXT:   [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64)
+  ; CHECK-NEXT:   G_STORE [[C8]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
+  ; CHECK-NEXT:   [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s64)
+  ; CHECK-NEXT:   G_STORE [[C9]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 2, align 1)
+  ; CHECK-NEXT:   [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s64)
+  ; CHECK-NEXT:   G_STORE [[C10]](s8), [[PTR_ADD2]](p0) :: (store (s8) into stack + 4)
+  ; CHECK-NEXT:   [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s64)
+  ; CHECK-NEXT:   G_STORE [[C11]](s8), [[PTR_ADD3]](p0) :: (store (s8) into stack + 5)
+  ; CHECK-NEXT:   BL @i8i16callee, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3, implicit $w4, implicit $x5, implicit $x6, implicit $x7, implicit-def $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
+  ; CHECK-NEXT:   ADJCALLSTACKUP 6, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+  ; CHECK-NEXT:   $w0 = COPY [[TRUNC]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
 entry:
   %call = tail call i64 @i8i16callee(i64 0, i64 1, i64 2, i8 signext 3, i16 signext 4, i64 5, i64 6, i64 7, i8 97, i16  98, i8  99, i8  100)
   %conv = trunc i64 %call to i32

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll
index 4b0cf5a8b8ba8..dbe1e36b2a3fb 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll
@@ -7,17 +7,18 @@ target triple = "aarch64-linux-gnu"
 define i32 @args_i32(i32 %w0, i32 %w1, i32 %w2, i32 %w3,
   ; CHECK-LABEL: name: args_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $w0, $w1, $w2, $w3, $w4, $w5, $w6, $w7
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $w4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $w5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $w6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $w7
-  ; CHECK:   $w0 = COPY [[COPY]](s32)
-  ; CHECK:   RET_ReallyLR implicit $w0
+  ; CHECK-NEXT:   liveins: $w0, $w1, $w2, $w3, $w4, $w5, $w6, $w7
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $w4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $w5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $w6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $w7
+  ; CHECK-NEXT:   $w0 = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
                      i32 %w4, i32 %w5, i32 %w6, i32 %w7) {
   ret i32 %w0
 }
@@ -25,17 +26,18 @@ define i32 @args_i32(i32 %w0, i32 %w1, i32 %w2, i32 %w3,
 define i64 @args_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3,
   ; CHECK-LABEL: name: args_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s64) = COPY $x5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s64) = COPY $x6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s64) = COPY $x7
-  ; CHECK:   $x0 = COPY [[COPY]](s64)
-  ; CHECK:   RET_ReallyLR implicit $x0
+  ; CHECK-NEXT:   liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s64) = COPY $x5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s64) = COPY $x6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s64) = COPY $x7
+  ; CHECK-NEXT:   $x0 = COPY [[COPY]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
                      i64 %x4, i64 %x5, i64 %x6, i64 %x7) {
   ret i64 %x0
 }
@@ -44,17 +46,18 @@ define i64 @args_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3,
 define i8* @args_ptrs(i8* %x0, i16* %x1, <2 x i8>* %x2, {i8, i16, i32}* %x3,
   ; CHECK-LABEL: name: args_ptrs
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(p0) = COPY $x3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(p0) = COPY $x4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(p0) = COPY $x5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(p0) = COPY $x6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(p0) = COPY $x7
-  ; CHECK:   $x0 = COPY [[COPY]](p0)
-  ; CHECK:   RET_ReallyLR implicit $x0
+  ; CHECK-NEXT:   liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(p0) = COPY $x3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(p0) = COPY $x4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(p0) = COPY $x5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(p0) = COPY $x6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(p0) = COPY $x7
+  ; CHECK-NEXT:   $x0 = COPY [[COPY]](p0)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
                       [3 x float]* %x4, double* %x5, i8* %x6, i8* %x7) {
   ret i8* %x0
 }
@@ -62,10 +65,11 @@ define i8* @args_ptrs(i8* %x0, i16* %x1, <2 x i8>* %x2, {i8, i16, i32}* %x3,
 define [1 x double] @args_arr([1 x double] %d0) {
   ; CHECK-LABEL: name: args_arr
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $d0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
-  ; CHECK:   $d0 = COPY [[COPY]](s64)
-  ; CHECK:   RET_ReallyLR implicit $d0
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+  ; CHECK-NEXT:   $d0 = COPY [[COPY]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
   ret [1 x double] %d0
 }
 
@@ -73,28 +77,28 @@ declare void @varargs(i32, double, i64, ...)
 define void @test_varargs() {
   ; CHECK-LABEL: name: test_varargs
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
-  ; CHECK:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-  ; CHECK:   [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
-  ; CHECK:   [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
-  ; CHECK:   [[C7:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
-  ; CHECK:   $w0 = COPY [[C]](s32)
-  ; CHECK:   $d0 = COPY [[C1]](s64)
-  ; CHECK:   $x1 = COPY [[C2]](s64)
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C3]](s8)
-  ; CHECK:   $w2 = COPY [[ANYEXT]](s32)
-  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[C4]](s16)
-  ; CHECK:   $w3 = COPY [[ANYEXT1]](s32)
-  ; CHECK:   $w4 = COPY [[C5]](s32)
-  ; CHECK:   $s1 = COPY [[C6]](s32)
-  ; CHECK:   $d2 = COPY [[C7]](s64)
-  ; CHECK:   BL @varargs, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1, implicit $w2, implicit $w3, implicit $w4, implicit $s1, implicit $d2
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
-  ; CHECK:   RET_ReallyLR
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   $w0 = COPY [[C]](s32)
+  ; CHECK-NEXT:   $d0 = COPY [[C1]](s64)
+  ; CHECK-NEXT:   $x1 = COPY [[C2]](s64)
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C3]](s8)
+  ; CHECK-NEXT:   $w2 = COPY [[ANYEXT]](s32)
+  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[C4]](s16)
+  ; CHECK-NEXT:   $w3 = COPY [[ANYEXT1]](s32)
+  ; CHECK-NEXT:   $w4 = COPY [[C5]](s32)
+  ; CHECK-NEXT:   $s1 = COPY [[C6]](s32)
+  ; CHECK-NEXT:   $d2 = COPY [[C7]](s64)
+  ; CHECK-NEXT:   BL @varargs, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1, implicit $w2, implicit $w3, implicit $w4, implicit $s1, implicit $d2
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   RET_ReallyLR
   call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i8 3, i16 1, i32 4, float 1.0, double 2.0)
   ret void
 }
@@ -107,24 +111,24 @@ declare void @stack_ext_needed([8 x i64], i8 signext %in)
 define void @test_stack_ext_needed() {
   ; CHECK-LABEL: name: test_stack_ext_needed
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-  ; CHECK:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 42
-  ; CHECK:   ADJCALLSTACKDOWN 8, 0, implicit-def $sp, implicit $sp
-  ; CHECK:   $x0 = COPY [[DEF]](s64)
-  ; CHECK:   $x1 = COPY [[DEF]](s64)
-  ; CHECK:   $x2 = COPY [[DEF]](s64)
-  ; CHECK:   $x3 = COPY [[DEF]](s64)
-  ; CHECK:   $x4 = COPY [[DEF]](s64)
-  ; CHECK:   $x5 = COPY [[DEF]](s64)
-  ; CHECK:   $x6 = COPY [[DEF]](s64)
-  ; CHECK:   $x7 = COPY [[DEF]](s64)
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; CHECK:   G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
-  ; CHECK:   BL @stack_ext_needed, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7
-  ; CHECK:   ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp
-  ; CHECK:   RET_ReallyLR
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 42
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 8, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   $x0 = COPY [[DEF]](s64)
+  ; CHECK-NEXT:   $x1 = COPY [[DEF]](s64)
+  ; CHECK-NEXT:   $x2 = COPY [[DEF]](s64)
+  ; CHECK-NEXT:   $x3 = COPY [[DEF]](s64)
+  ; CHECK-NEXT:   $x4 = COPY [[DEF]](s64)
+  ; CHECK-NEXT:   $x5 = COPY [[DEF]](s64)
+  ; CHECK-NEXT:   $x6 = COPY [[DEF]](s64)
+  ; CHECK-NEXT:   $x7 = COPY [[DEF]](s64)
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; CHECK-NEXT:   G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
+  ; CHECK-NEXT:   BL @stack_ext_needed, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7
+  ; CHECK-NEXT:   ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   RET_ReallyLR
   call void @stack_ext_needed([8 x i64] undef, i8 signext 42)
   ret void
 }
@@ -133,16 +137,17 @@ define void @test_stack_ext_needed() {
 define void @callee_s128(i128 %a, i128 %b, i128 *%ptr) {
   ; CHECK-LABEL: name: callee_s128
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $x0, $x1, $x2, $x3, $x4
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
-  ; CHECK:   [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY2]](s64), [[COPY3]](s64)
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(p0) = COPY $x4
-  ; CHECK:   G_STORE [[MV1]](s128), [[COPY4]](p0) :: (store (s128) into %ir.ptr)
-  ; CHECK:   RET_ReallyLR
+  ; CHECK-NEXT:   liveins: $x0, $x1, $x2, $x3, $x4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY2]](s64), [[COPY3]](s64)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(p0) = COPY $x4
+  ; CHECK-NEXT:   G_STORE [[MV1]](s128), [[COPY4]](p0) :: (store (s128) into %ir.ptr)
+  ; CHECK-NEXT:   RET_ReallyLR
   store i128 %b, i128 *%ptr
   ret void
 }
@@ -151,20 +156,21 @@ define void @callee_s128(i128 %a, i128 %b, i128 *%ptr) {
 define void @caller_s128(i128 *%ptr) {
   ; CHECK-LABEL: name: caller_s128
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $x0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128) from %ir.ptr)
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
-  ; CHECK:   [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128)
-  ; CHECK:   $x0 = COPY [[UV]](s64)
-  ; CHECK:   $x1 = COPY [[UV1]](s64)
-  ; CHECK:   [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128)
-  ; CHECK:   $x2 = COPY [[UV2]](s64)
-  ; CHECK:   $x3 = COPY [[UV3]](s64)
-  ; CHECK:   $x4 = COPY [[COPY]](p0)
-  ; CHECK:   BL @callee_s128, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
-  ; CHECK:   RET_ReallyLR
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128) from %ir.ptr)
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128)
+  ; CHECK-NEXT:   $x0 = COPY [[UV]](s64)
+  ; CHECK-NEXT:   $x1 = COPY [[UV1]](s64)
+  ; CHECK-NEXT:   [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128)
+  ; CHECK-NEXT:   $x2 = COPY [[UV2]](s64)
+  ; CHECK-NEXT:   $x3 = COPY [[UV3]](s64)
+  ; CHECK-NEXT:   $x4 = COPY [[COPY]](p0)
+  ; CHECK-NEXT:   BL @callee_s128, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   RET_ReallyLR
   %v = load i128, i128 *%ptr
   call void @callee_s128(i128 %v, i128 %v, i128 *%ptr)
   ret void
@@ -176,48 +182,48 @@ declare i64 @i8i16callee(i64 %a1, i64 %a2, i64 %a3, i8 signext %a4, i16 signext
 define i32 @i8i16caller() nounwind readnone {
   ; CHECK-LABEL: name: i8i16caller
   ; CHECK: bb.1.entry:
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-  ; CHECK:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-  ; CHECK:   [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
-  ; CHECK:   [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
-  ; CHECK:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-  ; CHECK:   [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-  ; CHECK:   [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-  ; CHECK:   [[C8:%[0-9]+]]:_(s8) = G_CONSTANT i8 97
-  ; CHECK:   [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 98
-  ; CHECK:   [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 99
-  ; CHECK:   [[C11:%[0-9]+]]:_(s8) = G_CONSTANT i8 100
-  ; CHECK:   ADJCALLSTACKDOWN 32, 0, implicit-def $sp, implicit $sp
-  ; CHECK:   $x0 = COPY [[C]](s64)
-  ; CHECK:   $x1 = COPY [[C1]](s64)
-  ; CHECK:   $x2 = COPY [[C2]](s64)
-  ; CHECK:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C3]](s8)
-  ; CHECK:   $w3 = COPY [[SEXT]](s32)
-  ; CHECK:   [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[C4]](s16)
-  ; CHECK:   $w4 = COPY [[SEXT1]](s32)
-  ; CHECK:   $x5 = COPY [[C5]](s64)
-  ; CHECK:   $x6 = COPY [[C6]](s64)
-  ; CHECK:   $x7 = COPY [[C7]](s64)
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
-  ; CHECK:   [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64)
-  ; CHECK:   G_STORE [[C8]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
-  ; CHECK:   [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s64)
-  ; CHECK:   G_STORE [[C9]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 8, align 1)
-  ; CHECK:   [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s64)
-  ; CHECK:   G_STORE [[C10]](s8), [[PTR_ADD2]](p0) :: (store (s8) into stack + 16)
-  ; CHECK:   [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
-  ; CHECK:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s64)
-  ; CHECK:   G_STORE [[C11]](s8), [[PTR_ADD3]](p0) :: (store (s8) into stack + 24)
-  ; CHECK:   BL @i8i16callee, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3, implicit $w4, implicit $x5, implicit $x6, implicit $x7, implicit-def $x0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
-  ; CHECK:   ADJCALLSTACKUP 32, 0, implicit-def $sp, implicit $sp
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
-  ; CHECK:   $w0 = COPY [[TRUNC]](s32)
-  ; CHECK:   RET_ReallyLR implicit $w0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+  ; CHECK-NEXT:   [[C8:%[0-9]+]]:_(s8) = G_CONSTANT i8 97
+  ; CHECK-NEXT:   [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 98
+  ; CHECK-NEXT:   [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 99
+  ; CHECK-NEXT:   [[C11:%[0-9]+]]:_(s8) = G_CONSTANT i8 100
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 32, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   $x0 = COPY [[C]](s64)
+  ; CHECK-NEXT:   $x1 = COPY [[C1]](s64)
+  ; CHECK-NEXT:   $x2 = COPY [[C2]](s64)
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C3]](s8)
+  ; CHECK-NEXT:   $w3 = COPY [[SEXT]](s32)
+  ; CHECK-NEXT:   [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[C4]](s16)
+  ; CHECK-NEXT:   $w4 = COPY [[SEXT1]](s32)
+  ; CHECK-NEXT:   $x5 = COPY [[C5]](s64)
+  ; CHECK-NEXT:   $x6 = COPY [[C6]](s64)
+  ; CHECK-NEXT:   $x7 = COPY [[C7]](s64)
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
+  ; CHECK-NEXT:   [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64)
+  ; CHECK-NEXT:   G_STORE [[C8]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
+  ; CHECK-NEXT:   [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s64)
+  ; CHECK-NEXT:   G_STORE [[C9]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 8, align 1)
+  ; CHECK-NEXT:   [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s64)
+  ; CHECK-NEXT:   G_STORE [[C10]](s8), [[PTR_ADD2]](p0) :: (store (s8) into stack + 16)
+  ; CHECK-NEXT:   [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s64)
+  ; CHECK-NEXT:   G_STORE [[C11]](s8), [[PTR_ADD3]](p0) :: (store (s8) into stack + 24)
+  ; CHECK-NEXT:   BL @i8i16callee, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3, implicit $w4, implicit $x5, implicit $x6, implicit $x7, implicit-def $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
+  ; CHECK-NEXT:   ADJCALLSTACKUP 32, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+  ; CHECK-NEXT:   $w0 = COPY [[TRUNC]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
 entry:
   %call = tail call i64 @i8i16callee(i64 0, i64 1, i64 2, i8 signext 3, i16 signext 4, i64 5, i64 6, i64 7, i8 97, i16  98, i8  99, i8  100)
   %conv = trunc i64 %call to i32
@@ -227,11 +233,12 @@ entry:
 define void @arg_v2i64(<2 x i64> %arg) {
   ; CHECK-LABEL: name: arg_v2i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[COPY]](<2 x s64>), [[DEF]](p0) :: (store (<2 x s64>) into `<2 x i64>* undef`)
-  ; CHECK:   RET_ReallyLR
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[COPY]](<2 x s64>), [[DEF]](p0) :: (store (<2 x s64>) into `<2 x i64>* undef`)
+  ; CHECK-NEXT:   RET_ReallyLR
   store <2 x i64> %arg, <2 x i64>* undef
   ret void
 }
@@ -239,15 +246,16 @@ define void @arg_v2i64(<2 x i64> %arg) {
 define void @arg_v8i64(<8 x i64> %arg) {
   ; CHECK-LABEL: name: arg_v8i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0, $q1, $q2, $q3
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s64>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>), [[COPY3]](<2 x s64>)
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[CONCAT_VECTORS]](<8 x s64>), [[DEF]](p0) :: (store (<8 x s64>) into `<8 x i64>* undef`)
-  ; CHECK:   RET_ReallyLR
+  ; CHECK-NEXT:   liveins: $q0, $q1, $q2, $q3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s64>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>), [[COPY3]](<2 x s64>)
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<8 x s64>), [[DEF]](p0) :: (store (<8 x s64>) into `<8 x i64>* undef`)
+  ; CHECK-NEXT:   RET_ReallyLR
   store <8 x i64> %arg, <8 x i64>* undef
   ret void
 }
@@ -255,12 +263,13 @@ define void @arg_v8i64(<8 x i64> %arg) {
 define void @arg_v4f32(<4 x float> %arg) {
   ; CHECK-LABEL: name: arg_v4f32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
-  ; CHECK:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BITCAST]](<4 x s32>), [[DEF]](p0) :: (store (<4 x s32>) into `<4 x float>* undef`)
-  ; CHECK:   RET_ReallyLR
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; CHECK-NEXT:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BITCAST]](<4 x s32>), [[DEF]](p0) :: (store (<4 x s32>) into `<4 x float>* undef`)
+  ; CHECK-NEXT:   RET_ReallyLR
   store <4 x float> %arg, <4 x float>* undef
   ret void
 }
@@ -268,19 +277,20 @@ define void @arg_v4f32(<4 x float> %arg) {
 define void @ret_arg_v16f32(<16 x float> %arg) {
   ; CHECK-LABEL: name: ret_arg_v16f32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $q0, $q1, $q2, $q3
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
-  ; CHECK:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
-  ; CHECK:   [[BITCAST1:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
-  ; CHECK:   [[BITCAST2:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY2]](<2 x s64>)
-  ; CHECK:   [[BITCAST3:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY3]](<2 x s64>)
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[BITCAST]](<4 x s32>), [[BITCAST1]](<4 x s32>), [[BITCAST2]](<4 x s32>), [[BITCAST3]](<4 x s32>)
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[CONCAT_VECTORS]](<16 x s32>), [[DEF]](p0) :: (store (<16 x s32>) into `<16 x float>* undef`)
-  ; CHECK:   RET_ReallyLR
+  ; CHECK-NEXT:   liveins: $q0, $q1, $q2, $q3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
+  ; CHECK-NEXT:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+  ; CHECK-NEXT:   [[BITCAST1:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
+  ; CHECK-NEXT:   [[BITCAST2:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY2]](<2 x s64>)
+  ; CHECK-NEXT:   [[BITCAST3:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY3]](<2 x s64>)
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[BITCAST]](<4 x s32>), [[BITCAST1]](<4 x s32>), [[BITCAST2]](<4 x s32>), [[BITCAST3]](<4 x s32>)
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<16 x s32>), [[DEF]](p0) :: (store (<16 x s32>) into `<16 x float>* undef`)
+  ; CHECK-NEXT:   RET_ReallyLR
   store <16 x float> %arg, <16 x float>* undef
   ret void
 }

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll
index a0ba7a0d570b0..63af1ba433ee9 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll
@@ -6,10 +6,10 @@ declare void @simple_fn()
 define void @tail_call() {
   ; DARWIN-LABEL: name: tail_call
   ; DARWIN: bb.1 (%ir-block.0):
-  ; DARWIN:   TCRETURNdi @simple_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp
+  ; DARWIN-NEXT:   TCRETURNdi @simple_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp
   ; WINDOWS-LABEL: name: tail_call
   ; WINDOWS: bb.1 (%ir-block.0):
-  ; WINDOWS:   TCRETURNdi @simple_fn, 0, csr_aarch64_aapcs, implicit $sp
+  ; WINDOWS-NEXT:   TCRETURNdi @simple_fn, 0, csr_aarch64_aapcs, implicit $sp
   tail call void @simple_fn()
   ret void
 }
@@ -19,14 +19,16 @@ define void @tail_call() {
 define void @indirect_tail_call(void()* %func) {
   ; DARWIN-LABEL: name: indirect_tail_call
   ; DARWIN: bb.1 (%ir-block.0):
-  ; DARWIN:   liveins: $x0
-  ; DARWIN:   [[COPY:%[0-9]+]]:tcgpr64(p0) = COPY $x0
-  ; DARWIN:   TCRETURNri [[COPY]](p0), 0, csr_darwin_aarch64_aapcs, implicit $sp
+  ; DARWIN-NEXT:   liveins: $x0
+  ; DARWIN-NEXT: {{  $}}
+  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:tcgpr64(p0) = COPY $x0
+  ; DARWIN-NEXT:   TCRETURNri [[COPY]](p0), 0, csr_darwin_aarch64_aapcs, implicit $sp
   ; WINDOWS-LABEL: name: indirect_tail_call
   ; WINDOWS: bb.1 (%ir-block.0):
-  ; WINDOWS:   liveins: $x0
-  ; WINDOWS:   [[COPY:%[0-9]+]]:tcgpr64(p0) = COPY $x0
-  ; WINDOWS:   TCRETURNri [[COPY]](p0), 0, csr_aarch64_aapcs, implicit $sp
+  ; WINDOWS-NEXT:   liveins: $x0
+  ; WINDOWS-NEXT: {{  $}}
+  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:tcgpr64(p0) = COPY $x0
+  ; WINDOWS-NEXT:   TCRETURNri [[COPY]](p0), 0, csr_aarch64_aapcs, implicit $sp
   tail call void %func()
   ret void
 }
@@ -35,16 +37,18 @@ declare void @outgoing_args_fn(i32)
 define void @test_outgoing_args(i32 %a) {
   ; DARWIN-LABEL: name: test_outgoing_args
   ; DARWIN: bb.1 (%ir-block.0):
-  ; DARWIN:   liveins: $w0
-  ; DARWIN:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
-  ; DARWIN:   $w0 = COPY [[COPY]](s32)
-  ; DARWIN:   TCRETURNdi @outgoing_args_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $w0
+  ; DARWIN-NEXT:   liveins: $w0
+  ; DARWIN-NEXT: {{  $}}
+  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; DARWIN-NEXT:   $w0 = COPY [[COPY]](s32)
+  ; DARWIN-NEXT:   TCRETURNdi @outgoing_args_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $w0
   ; WINDOWS-LABEL: name: test_outgoing_args
   ; WINDOWS: bb.1 (%ir-block.0):
-  ; WINDOWS:   liveins: $w0
-  ; WINDOWS:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
-  ; WINDOWS:   $w0 = COPY [[COPY]](s32)
-  ; WINDOWS:   TCRETURNdi @outgoing_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0
+  ; WINDOWS-NEXT:   liveins: $w0
+  ; WINDOWS-NEXT: {{  $}}
+  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; WINDOWS-NEXT:   $w0 = COPY [[COPY]](s32)
+  ; WINDOWS-NEXT:   TCRETURNdi @outgoing_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0
   tail call void @outgoing_args_fn(i32 %a)
   ret void
 }
@@ -56,34 +60,36 @@ declare void @outgoing_stack_args_fn(<4 x half>)
 define void @test_outgoing_stack_args([8 x <2 x double>], <4 x half> %arg) {
   ; DARWIN-LABEL: name: test_outgoing_stack_args
   ; DARWIN: bb.1 (%ir-block.1):
-  ; DARWIN:   liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
-  ; DARWIN:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
-  ; DARWIN:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
-  ; DARWIN:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
-  ; DARWIN:   [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
-  ; DARWIN:   [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
-  ; DARWIN:   [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
-  ; DARWIN:   [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
-  ; DARWIN:   [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
-  ; DARWIN:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; DARWIN:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
-  ; DARWIN:   $d0 = COPY [[LOAD]](<4 x s16>)
-  ; DARWIN:   TCRETURNdi @outgoing_stack_args_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $d0
+  ; DARWIN-NEXT:   liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
+  ; DARWIN-NEXT: {{  $}}
+  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; DARWIN-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+  ; DARWIN-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
+  ; DARWIN-NEXT:   [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
+  ; DARWIN-NEXT:   [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
+  ; DARWIN-NEXT:   [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
+  ; DARWIN-NEXT:   [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
+  ; DARWIN-NEXT:   [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
+  ; DARWIN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; DARWIN-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
+  ; DARWIN-NEXT:   $d0 = COPY [[LOAD]](<4 x s16>)
+  ; DARWIN-NEXT:   TCRETURNdi @outgoing_stack_args_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $d0
   ; WINDOWS-LABEL: name: test_outgoing_stack_args
   ; WINDOWS: bb.1 (%ir-block.1):
-  ; WINDOWS:   liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
-  ; WINDOWS:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
-  ; WINDOWS:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
-  ; WINDOWS:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
-  ; WINDOWS:   [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
-  ; WINDOWS:   [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
-  ; WINDOWS:   [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
-  ; WINDOWS:   [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
-  ; WINDOWS:   [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
-  ; WINDOWS:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; WINDOWS:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
-  ; WINDOWS:   $d0 = COPY [[LOAD]](<4 x s16>)
-  ; WINDOWS:   TCRETURNdi @outgoing_stack_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $d0
+  ; WINDOWS-NEXT:   liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
+  ; WINDOWS-NEXT: {{  $}}
+  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; WINDOWS-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+  ; WINDOWS-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
+  ; WINDOWS-NEXT:   [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
+  ; WINDOWS-NEXT:   [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
+  ; WINDOWS-NEXT:   [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
+  ; WINDOWS-NEXT:   [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
+  ; WINDOWS-NEXT:   [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
+  ; WINDOWS-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; WINDOWS-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
+  ; WINDOWS-NEXT:   $d0 = COPY [[LOAD]](<4 x s16>)
+  ; WINDOWS-NEXT:   TCRETURNdi @outgoing_stack_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $d0
   tail call void @outgoing_stack_args_fn(<4 x half> %arg)
   ret void
 }
@@ -94,56 +100,56 @@ declare i32 @too_big_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5,
 define i32 @test_too_big_stack() {
   ; DARWIN-LABEL: name: test_too_big_stack
   ; DARWIN: bb.1.entry:
-  ; DARWIN:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-  ; DARWIN:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 8
-  ; DARWIN:   [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
-  ; DARWIN:   ADJCALLSTACKDOWN 4, 0, implicit-def $sp, implicit $sp
-  ; DARWIN:   $x0 = COPY [[DEF]](s64)
-  ; DARWIN:   $x1 = COPY [[DEF]](s64)
-  ; DARWIN:   $x2 = COPY [[DEF]](s64)
-  ; DARWIN:   $x3 = COPY [[DEF]](s64)
-  ; DARWIN:   $x4 = COPY [[DEF]](s64)
-  ; DARWIN:   $x5 = COPY [[DEF]](s64)
-  ; DARWIN:   $x6 = COPY [[DEF]](s64)
-  ; DARWIN:   $x7 = COPY [[DEF]](s64)
-  ; DARWIN:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
-  ; DARWIN:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; DARWIN:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; DARWIN:   G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
-  ; DARWIN:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-  ; DARWIN:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-  ; DARWIN:   G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 2, align 1)
-  ; DARWIN:   BL @too_big_stack, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit-def $w0
-  ; DARWIN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
-  ; DARWIN:   ADJCALLSTACKUP 4, 0, implicit-def $sp, implicit $sp
-  ; DARWIN:   $w0 = COPY [[COPY1]](s32)
-  ; DARWIN:   RET_ReallyLR implicit $w0
+  ; DARWIN-NEXT:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+  ; DARWIN-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 8
+  ; DARWIN-NEXT:   [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
+  ; DARWIN-NEXT:   ADJCALLSTACKDOWN 4, 0, implicit-def $sp, implicit $sp
+  ; DARWIN-NEXT:   $x0 = COPY [[DEF]](s64)
+  ; DARWIN-NEXT:   $x1 = COPY [[DEF]](s64)
+  ; DARWIN-NEXT:   $x2 = COPY [[DEF]](s64)
+  ; DARWIN-NEXT:   $x3 = COPY [[DEF]](s64)
+  ; DARWIN-NEXT:   $x4 = COPY [[DEF]](s64)
+  ; DARWIN-NEXT:   $x5 = COPY [[DEF]](s64)
+  ; DARWIN-NEXT:   $x6 = COPY [[DEF]](s64)
+  ; DARWIN-NEXT:   $x7 = COPY [[DEF]](s64)
+  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
+  ; DARWIN-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; DARWIN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; DARWIN-NEXT:   G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
+  ; DARWIN-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+  ; DARWIN-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+  ; DARWIN-NEXT:   G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 2, align 1)
+  ; DARWIN-NEXT:   BL @too_big_stack, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit-def $w0
+  ; DARWIN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
+  ; DARWIN-NEXT:   ADJCALLSTACKUP 4, 0, implicit-def $sp, implicit $sp
+  ; DARWIN-NEXT:   $w0 = COPY [[COPY1]](s32)
+  ; DARWIN-NEXT:   RET_ReallyLR implicit $w0
   ; WINDOWS-LABEL: name: test_too_big_stack
   ; WINDOWS: bb.1.entry:
-  ; WINDOWS:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-  ; WINDOWS:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 8
-  ; WINDOWS:   [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
-  ; WINDOWS:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
-  ; WINDOWS:   $x0 = COPY [[DEF]](s64)
-  ; WINDOWS:   $x1 = COPY [[DEF]](s64)
-  ; WINDOWS:   $x2 = COPY [[DEF]](s64)
-  ; WINDOWS:   $x3 = COPY [[DEF]](s64)
-  ; WINDOWS:   $x4 = COPY [[DEF]](s64)
-  ; WINDOWS:   $x5 = COPY [[DEF]](s64)
-  ; WINDOWS:   $x6 = COPY [[DEF]](s64)
-  ; WINDOWS:   $x7 = COPY [[DEF]](s64)
-  ; WINDOWS:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
-  ; WINDOWS:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; WINDOWS:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; WINDOWS:   G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
-  ; WINDOWS:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; WINDOWS:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-  ; WINDOWS:   G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 8, align 1)
-  ; WINDOWS:   BL @too_big_stack, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit-def $w0
-  ; WINDOWS:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
-  ; WINDOWS:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
-  ; WINDOWS:   $w0 = COPY [[COPY1]](s32)
-  ; WINDOWS:   RET_ReallyLR implicit $w0
+  ; WINDOWS-NEXT:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+  ; WINDOWS-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 8
+  ; WINDOWS-NEXT:   [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
+  ; WINDOWS-NEXT:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; WINDOWS-NEXT:   $x0 = COPY [[DEF]](s64)
+  ; WINDOWS-NEXT:   $x1 = COPY [[DEF]](s64)
+  ; WINDOWS-NEXT:   $x2 = COPY [[DEF]](s64)
+  ; WINDOWS-NEXT:   $x3 = COPY [[DEF]](s64)
+  ; WINDOWS-NEXT:   $x4 = COPY [[DEF]](s64)
+  ; WINDOWS-NEXT:   $x5 = COPY [[DEF]](s64)
+  ; WINDOWS-NEXT:   $x6 = COPY [[DEF]](s64)
+  ; WINDOWS-NEXT:   $x7 = COPY [[DEF]](s64)
+  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
+  ; WINDOWS-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; WINDOWS-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; WINDOWS-NEXT:   G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
+  ; WINDOWS-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; WINDOWS-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+  ; WINDOWS-NEXT:   G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 8, align 1)
+  ; WINDOWS-NEXT:   BL @too_big_stack, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit-def $w0
+  ; WINDOWS-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
+  ; WINDOWS-NEXT:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; WINDOWS-NEXT:   $w0 = COPY [[COPY1]](s32)
+  ; WINDOWS-NEXT:   RET_ReallyLR implicit $w0
 entry:
   %call = tail call i32 @too_big_stack(i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i8 8, i16 9)
   ret i32 %call
@@ -156,10 +162,10 @@ declare i32 @nonvoid_ret()
 define i32 @test_nonvoid_ret() {
   ; DARWIN-LABEL: name: test_nonvoid_ret
   ; DARWIN: bb.1 (%ir-block.0):
-  ; DARWIN:   TCRETURNdi @nonvoid_ret, 0, csr_darwin_aarch64_aapcs, implicit $sp
+  ; DARWIN-NEXT:   TCRETURNdi @nonvoid_ret, 0, csr_darwin_aarch64_aapcs, implicit $sp
   ; WINDOWS-LABEL: name: test_nonvoid_ret
   ; WINDOWS: bb.1 (%ir-block.0):
-  ; WINDOWS:   TCRETURNdi @nonvoid_ret, 0, csr_aarch64_aapcs, implicit $sp
+  ; WINDOWS-NEXT:   TCRETURNdi @nonvoid_ret, 0, csr_aarch64_aapcs, implicit $sp
   %call = tail call i32 @nonvoid_ret()
   ret i32 %call
 }
@@ -168,22 +174,22 @@ declare void @varargs(i32, double, i64, ...)
 define void @test_varargs() {
   ; DARWIN-LABEL: name: test_varargs
   ; DARWIN: bb.1 (%ir-block.0):
-  ; DARWIN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; DARWIN:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
-  ; DARWIN:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-  ; DARWIN:   $w0 = COPY [[C]](s32)
-  ; DARWIN:   $d0 = COPY [[C1]](s64)
-  ; DARWIN:   $x1 = COPY [[C2]](s64)
-  ; DARWIN:   TCRETURNdi @varargs, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1
+  ; DARWIN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; DARWIN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+  ; DARWIN-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; DARWIN-NEXT:   $w0 = COPY [[C]](s32)
+  ; DARWIN-NEXT:   $d0 = COPY [[C1]](s64)
+  ; DARWIN-NEXT:   $x1 = COPY [[C2]](s64)
+  ; DARWIN-NEXT:   TCRETURNdi @varargs, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1
   ; WINDOWS-LABEL: name: test_varargs
   ; WINDOWS: bb.1 (%ir-block.0):
-  ; WINDOWS:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; WINDOWS:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
-  ; WINDOWS:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-  ; WINDOWS:   $w0 = COPY [[C]](s32)
-  ; WINDOWS:   $x1 = COPY [[C1]](s64)
-  ; WINDOWS:   $x2 = COPY [[C2]](s64)
-  ; WINDOWS:   TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2
+  ; WINDOWS-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; WINDOWS-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+  ; WINDOWS-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; WINDOWS-NEXT:   $w0 = COPY [[C]](s32)
+  ; WINDOWS-NEXT:   $x1 = COPY [[C1]](s64)
+  ; WINDOWS-NEXT:   $x2 = COPY [[C2]](s64)
+  ; WINDOWS-NEXT:   TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2
   tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12)
   ret void
 }
@@ -195,32 +201,32 @@ define void @test_varargs_2() {
 
   ; DARWIN-LABEL: name: test_varargs_2
   ; DARWIN: bb.1 (%ir-block.0):
-  ; DARWIN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; DARWIN:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
-  ; DARWIN:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-  ; DARWIN:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
-  ; DARWIN:   ADJCALLSTACKDOWN 8, 0, implicit-def $sp, implicit $sp
-  ; DARWIN:   $w0 = COPY [[C]](s32)
-  ; DARWIN:   $d0 = COPY [[C1]](s64)
-  ; DARWIN:   $x1 = COPY [[C2]](s64)
-  ; DARWIN:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
-  ; DARWIN:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; DARWIN:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-  ; DARWIN:   G_STORE [[C3]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1)
-  ; DARWIN:   BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
-  ; DARWIN:   ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp
-  ; DARWIN:   RET_ReallyLR
+  ; DARWIN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; DARWIN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+  ; DARWIN-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; DARWIN-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
+  ; DARWIN-NEXT:   ADJCALLSTACKDOWN 8, 0, implicit-def $sp, implicit $sp
+  ; DARWIN-NEXT:   $w0 = COPY [[C]](s32)
+  ; DARWIN-NEXT:   $d0 = COPY [[C1]](s64)
+  ; DARWIN-NEXT:   $x1 = COPY [[C2]](s64)
+  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
+  ; DARWIN-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; DARWIN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+  ; DARWIN-NEXT:   G_STORE [[C3]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1)
+  ; DARWIN-NEXT:   BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
+  ; DARWIN-NEXT:   ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp
+  ; DARWIN-NEXT:   RET_ReallyLR
   ; WINDOWS-LABEL: name: test_varargs_2
   ; WINDOWS: bb.1 (%ir-block.0):
-  ; WINDOWS:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; WINDOWS:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
-  ; WINDOWS:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-  ; WINDOWS:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
-  ; WINDOWS:   $w0 = COPY [[C]](s32)
-  ; WINDOWS:   $x1 = COPY [[C1]](s64)
-  ; WINDOWS:   $x2 = COPY [[C2]](s64)
-  ; WINDOWS:   $x3 = COPY [[C3]](s64)
-  ; WINDOWS:   TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2, implicit $x3
+  ; WINDOWS-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; WINDOWS-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+  ; WINDOWS-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; WINDOWS-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
+  ; WINDOWS-NEXT:   $w0 = COPY [[C]](s32)
+  ; WINDOWS-NEXT:   $x1 = COPY [[C1]](s64)
+  ; WINDOWS-NEXT:   $x2 = COPY [[C2]](s64)
+  ; WINDOWS-NEXT:   $x3 = COPY [[C3]](s64)
+  ; WINDOWS-NEXT:   TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2, implicit $x3
   tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314)
   ret void
 }
@@ -232,54 +238,56 @@ define void @test_varargs_3([8 x <2 x double>], <4 x half> %arg) {
 
   ; DARWIN-LABEL: name: test_varargs_3
   ; DARWIN: bb.1 (%ir-block.1):
-  ; DARWIN:   liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
-  ; DARWIN:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
-  ; DARWIN:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
-  ; DARWIN:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
-  ; DARWIN:   [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
-  ; DARWIN:   [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
-  ; DARWIN:   [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
-  ; DARWIN:   [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
-  ; DARWIN:   [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
-  ; DARWIN:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; DARWIN:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
-  ; DARWIN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; DARWIN:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
-  ; DARWIN:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-  ; DARWIN:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
-  ; DARWIN:   ADJCALLSTACKDOWN 8, 0, implicit-def $sp, implicit $sp
-  ; DARWIN:   $w0 = COPY [[C]](s32)
-  ; DARWIN:   $d0 = COPY [[C1]](s64)
-  ; DARWIN:   $x1 = COPY [[C2]](s64)
-  ; DARWIN:   [[COPY8:%[0-9]+]]:_(p0) = COPY $sp
-  ; DARWIN:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; DARWIN:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C4]](s64)
-  ; DARWIN:   G_STORE [[C3]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1)
-  ; DARWIN:   BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
-  ; DARWIN:   ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp
-  ; DARWIN:   RET_ReallyLR
+  ; DARWIN-NEXT:   liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
+  ; DARWIN-NEXT: {{  $}}
+  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; DARWIN-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+  ; DARWIN-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
+  ; DARWIN-NEXT:   [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
+  ; DARWIN-NEXT:   [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
+  ; DARWIN-NEXT:   [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
+  ; DARWIN-NEXT:   [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
+  ; DARWIN-NEXT:   [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
+  ; DARWIN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; DARWIN-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
+  ; DARWIN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; DARWIN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+  ; DARWIN-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; DARWIN-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
+  ; DARWIN-NEXT:   ADJCALLSTACKDOWN 8, 0, implicit-def $sp, implicit $sp
+  ; DARWIN-NEXT:   $w0 = COPY [[C]](s32)
+  ; DARWIN-NEXT:   $d0 = COPY [[C1]](s64)
+  ; DARWIN-NEXT:   $x1 = COPY [[C2]](s64)
+  ; DARWIN-NEXT:   [[COPY8:%[0-9]+]]:_(p0) = COPY $sp
+  ; DARWIN-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; DARWIN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C4]](s64)
+  ; DARWIN-NEXT:   G_STORE [[C3]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1)
+  ; DARWIN-NEXT:   BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
+  ; DARWIN-NEXT:   ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp
+  ; DARWIN-NEXT:   RET_ReallyLR
   ; WINDOWS-LABEL: name: test_varargs_3
   ; WINDOWS: bb.1 (%ir-block.1):
-  ; WINDOWS:   liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
-  ; WINDOWS:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
-  ; WINDOWS:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
-  ; WINDOWS:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
-  ; WINDOWS:   [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
-  ; WINDOWS:   [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
-  ; WINDOWS:   [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
-  ; WINDOWS:   [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
-  ; WINDOWS:   [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
-  ; WINDOWS:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; WINDOWS:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
-  ; WINDOWS:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; WINDOWS:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
-  ; WINDOWS:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-  ; WINDOWS:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
-  ; WINDOWS:   $w0 = COPY [[C]](s32)
-  ; WINDOWS:   $x1 = COPY [[C1]](s64)
-  ; WINDOWS:   $x2 = COPY [[C2]](s64)
-  ; WINDOWS:   $x3 = COPY [[C3]](s64)
-  ; WINDOWS:   TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2, implicit $x3
+  ; WINDOWS-NEXT:   liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
+  ; WINDOWS-NEXT: {{  $}}
+  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; WINDOWS-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+  ; WINDOWS-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
+  ; WINDOWS-NEXT:   [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
+  ; WINDOWS-NEXT:   [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
+  ; WINDOWS-NEXT:   [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
+  ; WINDOWS-NEXT:   [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
+  ; WINDOWS-NEXT:   [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
+  ; WINDOWS-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; WINDOWS-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
+  ; WINDOWS-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; WINDOWS-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+  ; WINDOWS-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; WINDOWS-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
+  ; WINDOWS-NEXT:   $w0 = COPY [[C]](s32)
+  ; WINDOWS-NEXT:   $x1 = COPY [[C1]](s64)
+  ; WINDOWS-NEXT:   $x2 = COPY [[C2]](s64)
+  ; WINDOWS-NEXT:   $x3 = COPY [[C3]](s64)
+  ; WINDOWS-NEXT:   TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2, implicit $x3
   tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314)
   ret void
 }
@@ -290,16 +298,16 @@ declare ghccc void @bad_call_conv_fn()
 define void @test_bad_call_conv() {
   ; DARWIN-LABEL: name: test_bad_call_conv
   ; DARWIN: bb.1 (%ir-block.0):
-  ; DARWIN:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
-  ; DARWIN:   BL @bad_call_conv_fn, csr_aarch64_noregs, implicit-def $lr, implicit $sp
-  ; DARWIN:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
-  ; DARWIN:   RET_ReallyLR
+  ; DARWIN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; DARWIN-NEXT:   BL @bad_call_conv_fn, csr_aarch64_noregs, implicit-def $lr, implicit $sp
+  ; DARWIN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; DARWIN-NEXT:   RET_ReallyLR
   ; WINDOWS-LABEL: name: test_bad_call_conv
   ; WINDOWS: bb.1 (%ir-block.0):
-  ; WINDOWS:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
-  ; WINDOWS:   BL @bad_call_conv_fn, csr_aarch64_noregs, implicit-def $lr, implicit $sp
-  ; WINDOWS:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
-  ; WINDOWS:   RET_ReallyLR
+  ; WINDOWS-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; WINDOWS-NEXT:   BL @bad_call_conv_fn, csr_aarch64_noregs, implicit-def $lr, implicit $sp
+  ; WINDOWS-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; WINDOWS-NEXT:   RET_ReallyLR
   tail call ghccc void @bad_call_conv_fn()
   ret void
 }
@@ -308,20 +316,20 @@ define void @test_bad_call_conv() {
 define void @test_byval(i8* byval(i8) %ptr) {
   ; DARWIN-LABEL: name: test_byval
   ; DARWIN: bb.1 (%ir-block.0):
-  ; DARWIN:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; DARWIN:   [[COPY:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0)
-  ; DARWIN:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
-  ; DARWIN:   BL @simple_fn, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp
-  ; DARWIN:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
-  ; DARWIN:   RET_ReallyLR
+  ; DARWIN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0)
+  ; DARWIN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; DARWIN-NEXT:   BL @simple_fn, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp
+  ; DARWIN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; DARWIN-NEXT:   RET_ReallyLR
   ; WINDOWS-LABEL: name: test_byval
   ; WINDOWS: bb.1 (%ir-block.0):
-  ; WINDOWS:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; WINDOWS:   [[COPY:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0)
-  ; WINDOWS:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
-  ; WINDOWS:   BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
-  ; WINDOWS:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
-  ; WINDOWS:   RET_ReallyLR
+  ; WINDOWS-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0)
+  ; WINDOWS-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; WINDOWS-NEXT:   BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
+  ; WINDOWS-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; WINDOWS-NEXT:   RET_ReallyLR
   tail call void @simple_fn()
   ret void
 }
@@ -330,20 +338,22 @@ define void @test_byval(i8* byval(i8) %ptr) {
 define void @test_inreg(i8* inreg %ptr) {
   ; DARWIN-LABEL: name: test_inreg
   ; DARWIN: bb.1 (%ir-block.0):
-  ; DARWIN:   liveins: $x0
-  ; DARWIN:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-  ; DARWIN:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
-  ; DARWIN:   BL @simple_fn, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp
-  ; DARWIN:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
-  ; DARWIN:   RET_ReallyLR
+  ; DARWIN-NEXT:   liveins: $x0
+  ; DARWIN-NEXT: {{  $}}
+  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; DARWIN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; DARWIN-NEXT:   BL @simple_fn, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp
+  ; DARWIN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; DARWIN-NEXT:   RET_ReallyLR
   ; WINDOWS-LABEL: name: test_inreg
   ; WINDOWS: bb.1 (%ir-block.0):
-  ; WINDOWS:   liveins: $x0
-  ; WINDOWS:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-  ; WINDOWS:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
-  ; WINDOWS:   BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
-  ; WINDOWS:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
-  ; WINDOWS:   RET_ReallyLR
+  ; WINDOWS-NEXT:   liveins: $x0
+  ; WINDOWS-NEXT: {{  $}}
+  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; WINDOWS-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; WINDOWS-NEXT:   BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
+  ; WINDOWS-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; WINDOWS-NEXT:   RET_ReallyLR
   tail call void @simple_fn()
   ret void
 }
@@ -352,10 +362,10 @@ declare fastcc void @fast_fn()
 define void @test_mismatched_caller() {
   ; DARWIN-LABEL: name: test_mismatched_caller
   ; DARWIN: bb.1 (%ir-block.0):
-  ; DARWIN:   TCRETURNdi @fast_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp
+  ; DARWIN-NEXT:   TCRETURNdi @fast_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp
   ; WINDOWS-LABEL: name: test_mismatched_caller
   ; WINDOWS: bb.1 (%ir-block.0):
-  ; WINDOWS:   TCRETURNdi @fast_fn, 0, csr_aarch64_aapcs, implicit $sp
+  ; WINDOWS-NEXT:   TCRETURNdi @fast_fn, 0, csr_aarch64_aapcs, implicit $sp
   tail call fastcc void @fast_fn()
   ret void
 }
@@ -365,10 +375,10 @@ declare void @llvm.assume(i1)
 define void @test_assume() local_unnamed_addr {
   ; DARWIN-LABEL: name: test_assume
   ; DARWIN: bb.1.entry:
-  ; DARWIN:   TCRETURNdi @nonvoid_ret, 0, csr_darwin_aarch64_aapcs, implicit $sp
+  ; DARWIN-NEXT:   TCRETURNdi @nonvoid_ret, 0, csr_darwin_aarch64_aapcs, implicit $sp
   ; WINDOWS-LABEL: name: test_assume
   ; WINDOWS: bb.1.entry:
-  ; WINDOWS:   TCRETURNdi @nonvoid_ret, 0, csr_aarch64_aapcs, implicit $sp
+  ; WINDOWS-NEXT:   TCRETURNdi @nonvoid_ret, 0, csr_aarch64_aapcs, implicit $sp
 entry:
   %x = tail call i32 @nonvoid_ret()
   %y = icmp ne i32 %x, 0
@@ -381,14 +391,14 @@ declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
 define void @test_lifetime() local_unnamed_addr {
   ; DARWIN-LABEL: name: test_lifetime
   ; DARWIN: bb.1.entry:
-  ; DARWIN:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.t
-  ; DARWIN:   LIFETIME_START %stack.0.t
-  ; DARWIN:   TCRETURNdi @nonvoid_ret, 0, csr_darwin_aarch64_aapcs, implicit $sp
+  ; DARWIN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.t
+  ; DARWIN-NEXT:   LIFETIME_START %stack.0.t
+  ; DARWIN-NEXT:   TCRETURNdi @nonvoid_ret, 0, csr_darwin_aarch64_aapcs, implicit $sp
   ; WINDOWS-LABEL: name: test_lifetime
   ; WINDOWS: bb.1.entry:
-  ; WINDOWS:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.t
-  ; WINDOWS:   LIFETIME_START %stack.0.t
-  ; WINDOWS:   TCRETURNdi @nonvoid_ret, 0, csr_aarch64_aapcs, implicit $sp
+  ; WINDOWS-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.t
+  ; WINDOWS-NEXT:   LIFETIME_START %stack.0.t
+  ; WINDOWS-NEXT:   TCRETURNdi @nonvoid_ret, 0, csr_aarch64_aapcs, implicit $sp
 entry:
   %t = alloca i8, align 1
   call void @llvm.lifetime.start.p0i8(i64 1, i8* %t)
@@ -405,24 +415,26 @@ declare i8* @pluto()
 define hidden swiftcc i64 @swiftself_indirect_tail(i64* swiftself %arg) {
   ; DARWIN-LABEL: name: swiftself_indirect_tail
   ; DARWIN: bb.1 (%ir-block.0):
-  ; DARWIN:   liveins: $x20
-  ; DARWIN:   [[COPY:%[0-9]+]]:_(p0) = COPY $x20
-  ; DARWIN:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
-  ; DARWIN:   BL @pluto, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0
-  ; DARWIN:   [[COPY1:%[0-9]+]]:tcgpr64(p0) = COPY $x0
-  ; DARWIN:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
-  ; DARWIN:   $x20 = COPY [[COPY]](p0)
-  ; DARWIN:   TCRETURNri [[COPY1]](p0), 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x20
+  ; DARWIN-NEXT:   liveins: $x20
+  ; DARWIN-NEXT: {{  $}}
+  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x20
+  ; DARWIN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; DARWIN-NEXT:   BL @pluto, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0
+  ; DARWIN-NEXT:   [[COPY1:%[0-9]+]]:tcgpr64(p0) = COPY $x0
+  ; DARWIN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; DARWIN-NEXT:   $x20 = COPY [[COPY]](p0)
+  ; DARWIN-NEXT:   TCRETURNri [[COPY1]](p0), 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x20
   ; WINDOWS-LABEL: name: swiftself_indirect_tail
   ; WINDOWS: bb.1 (%ir-block.0):
-  ; WINDOWS:   liveins: $x20
-  ; WINDOWS:   [[COPY:%[0-9]+]]:_(p0) = COPY $x20
-  ; WINDOWS:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
-  ; WINDOWS:   BL @pluto, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0
-  ; WINDOWS:   [[COPY1:%[0-9]+]]:tcgpr64(p0) = COPY $x0
-  ; WINDOWS:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
-  ; WINDOWS:   $x20 = COPY [[COPY]](p0)
-  ; WINDOWS:   TCRETURNri [[COPY1]](p0), 0, csr_aarch64_aapcs, implicit $sp, implicit $x20
+  ; WINDOWS-NEXT:   liveins: $x20
+  ; WINDOWS-NEXT: {{  $}}
+  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x20
+  ; WINDOWS-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; WINDOWS-NEXT:   BL @pluto, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0
+  ; WINDOWS-NEXT:   [[COPY1:%[0-9]+]]:tcgpr64(p0) = COPY $x0
+  ; WINDOWS-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; WINDOWS-NEXT:   $x20 = COPY [[COPY]](p0)
+  ; WINDOWS-NEXT:   TCRETURNri [[COPY1]](p0), 0, csr_aarch64_aapcs, implicit $sp, implicit $x20
   %tmp = call i8* @pluto()
   %tmp1 = bitcast i8* %tmp to i64 (i64*)*
   %tmp2 = tail call swiftcc i64 %tmp1(i64* swiftself %arg)
@@ -434,18 +446,20 @@ declare void @must_callee(i8*)
 define void @foo(i32*) {
   ; DARWIN-LABEL: name: foo
   ; DARWIN: bb.1 (%ir-block.1):
-  ; DARWIN:   liveins: $x0
-  ; DARWIN:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-  ; DARWIN:   [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
-  ; DARWIN:   $x0 = COPY [[C]](p0)
-  ; DARWIN:   TCRETURNdi @must_callee, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x0
+  ; DARWIN-NEXT:   liveins: $x0
+  ; DARWIN-NEXT: {{  $}}
+  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; DARWIN-NEXT:   [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+  ; DARWIN-NEXT:   $x0 = COPY [[C]](p0)
+  ; DARWIN-NEXT:   TCRETURNdi @must_callee, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x0
   ; WINDOWS-LABEL: name: foo
   ; WINDOWS: bb.1 (%ir-block.1):
-  ; WINDOWS:   liveins: $x0
-  ; WINDOWS:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-  ; WINDOWS:   [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
-  ; WINDOWS:   $x0 = COPY [[C]](p0)
-  ; WINDOWS:   TCRETURNdi @must_callee, 0, csr_aarch64_aapcs, implicit $sp, implicit $x0
+  ; WINDOWS-NEXT:   liveins: $x0
+  ; WINDOWS-NEXT: {{  $}}
+  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; WINDOWS-NEXT:   [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+  ; WINDOWS-NEXT:   $x0 = COPY [[C]](p0)
+  ; WINDOWS-NEXT:   TCRETURNdi @must_callee, 0, csr_aarch64_aapcs, implicit $sp, implicit $x0
   musttail call void @must_callee(i8* null)
   ret void
 }
@@ -456,24 +470,26 @@ declare void @outgoing_v16f16(<16 x half>)
 define void @test_tail_call_outgoing_v16f16(<16 x half> %arg) {
   ; DARWIN-LABEL: name: test_tail_call_outgoing_v16f16
   ; DARWIN: bb.1 (%ir-block.0):
-  ; DARWIN:   liveins: $q0, $q1
-  ; DARWIN:   [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
-  ; DARWIN:   [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
-  ; DARWIN:   [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<8 x s16>), [[COPY1]](<8 x s16>)
-  ; DARWIN:   [[UV:%[0-9]+]]:_(<8 x s16>), [[UV1:%[0-9]+]]:_(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>)
-  ; DARWIN:   $q0 = COPY [[UV]](<8 x s16>)
-  ; DARWIN:   $q1 = COPY [[UV1]](<8 x s16>)
-  ; DARWIN:   TCRETURNdi @outgoing_v16f16, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1
+  ; DARWIN-NEXT:   liveins: $q0, $q1
+  ; DARWIN-NEXT: {{  $}}
+  ; DARWIN-NEXT:   [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+  ; DARWIN-NEXT:   [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
+  ; DARWIN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<8 x s16>), [[COPY1]](<8 x s16>)
+  ; DARWIN-NEXT:   [[UV:%[0-9]+]]:_(<8 x s16>), [[UV1:%[0-9]+]]:_(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>)
+  ; DARWIN-NEXT:   $q0 = COPY [[UV]](<8 x s16>)
+  ; DARWIN-NEXT:   $q1 = COPY [[UV1]](<8 x s16>)
+  ; DARWIN-NEXT:   TCRETURNdi @outgoing_v16f16, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1
   ; WINDOWS-LABEL: name: test_tail_call_outgoing_v16f16
   ; WINDOWS: bb.1 (%ir-block.0):
-  ; WINDOWS:   liveins: $q0, $q1
-  ; WINDOWS:   [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
-  ; WINDOWS:   [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
-  ; WINDOWS:   [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<8 x s16>), [[COPY1]](<8 x s16>)
-  ; WINDOWS:   [[UV:%[0-9]+]]:_(<8 x s16>), [[UV1:%[0-9]+]]:_(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>)
-  ; WINDOWS:   $q0 = COPY [[UV]](<8 x s16>)
-  ; WINDOWS:   $q1 = COPY [[UV1]](<8 x s16>)
-  ; WINDOWS:   TCRETURNdi @outgoing_v16f16, 0, csr_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1
+  ; WINDOWS-NEXT:   liveins: $q0, $q1
+  ; WINDOWS-NEXT: {{  $}}
+  ; WINDOWS-NEXT:   [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+  ; WINDOWS-NEXT:   [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
+  ; WINDOWS-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<8 x s16>), [[COPY1]](<8 x s16>)
+  ; WINDOWS-NEXT:   [[UV:%[0-9]+]]:_(<8 x s16>), [[UV1:%[0-9]+]]:_(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>)
+  ; WINDOWS-NEXT:   $q0 = COPY [[UV]](<8 x s16>)
+  ; WINDOWS-NEXT:   $q1 = COPY [[UV1]](<8 x s16>)
+  ; WINDOWS-NEXT:   TCRETURNdi @outgoing_v16f16, 0, csr_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1
   tail call void @outgoing_v16f16(<16 x half> %arg)
   ret void
 }

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll
index 5076b3c9d5e9a..034018b34aab6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll
@@ -1,42 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
 
 declare void @use_s128(i128 %a, i128 %b)
 
-; Check partially passing a split type on the stack (s128 -> 2 x s64)
-; CHECK-LABEL: name: call_use_s128
-; CHECK: fixedStack:
-; CHECK:  - { id: 0, type: default, offset: 16, size: 4, alignment: 16
-; CHECK:  - { id: 1, type: default, offset: 8, size: 8, alignment: 8, stack-id: default,
-; CHECK:  - { id: 2, type: default, offset: 0, size: 8, alignment: 16, stack-id: default,
-; CHECK: bb.1.entry:
-; CHECK:   liveins: $w0, $w4, $w5, $w6, $x2, $x3
-; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x2
-; CHECK:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x3
-; CHECK:   [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY1]](s64), [[COPY2]](s64)
-; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $w4
-; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $w5
-; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $w6
-; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
-; CHECK:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64)  from %fixed-stack.2, align 16)
-; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
-; CHECK:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64)  from %fixed-stack.1)
-; CHECK:   [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64)
-; CHECK:   [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16)
-; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
-; CHECK:   [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV]](s128)
-; CHECK:   $x0 = COPY [[UV]](s64)
-; CHECK:   $x1 = COPY [[UV1]](s64)
-; CHECK:   [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV1]](s128)
-; CHECK:   $x2 = COPY [[UV2]](s64)
-; CHECK:   $x3 = COPY [[UV3]](s64)
-; CHECK:   BL @use_s128, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3
-; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
-; CHECK:   $w0 = COPY [[C]](s32)
-; CHECK:   RET_ReallyLR implicit $w0
 define i32 @call_use_s128(i32 %p1, i128 %p2, i32 %p3, i32 %p4, i32 %p5, i128 %p6, i32 %p7) {
+  ; Check partially passing a split type on the stack (s128 -> 2 x s64)
+  ; CHECK-LABEL: name: call_use_s128
+  ; CHECK: bb.1.entry:
+  ; CHECK-NEXT:   liveins: $w0, $w4, $w5, $w6, $x2, $x3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x2
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x3
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY1]](s64), [[COPY2]](s64)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $w4
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $w5
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $w6
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.2, align 16)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.1)
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64)
+  ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV]](s128)
+  ; CHECK-NEXT:   $x0 = COPY [[UV]](s64)
+  ; CHECK-NEXT:   $x1 = COPY [[UV1]](s64)
+  ; CHECK-NEXT:   [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV1]](s128)
+  ; CHECK-NEXT:   $x2 = COPY [[UV2]](s64)
+  ; CHECK-NEXT:   $x3 = COPY [[UV3]](s64)
+  ; CHECK-NEXT:   BL @use_s128, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   $w0 = COPY [[C]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
 entry:
   call void @use_s128(i128 %p2, i128 %p6)
   ret i32 0

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
index 2a7c3bc36a8f0..d314081f07527 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -O0 -mtriple=aarch64-apple-ios -global-isel -stop-after=irtranslator %s -o - | FileCheck %s
 
 @_ZTIi = external global i8*
@@ -6,30 +7,40 @@ declare i32 @foo(i32)
 declare i32 @__gxx_personality_v0(...)
 declare i32 @llvm.eh.typeid.for(i8*)
 
-; CHECK-LABEL: name: bar
-; CHECK: body:
-; CHECK-NEXT:   bb.1 (%ir-block.0):
-; CHECK:     successors: %[[GOOD:bb.[0-9]+]]{{.*}}%[[BAD:bb.[0-9]+]]
-; CHECK:     EH_LABEL
-; CHECK:     $w0 = COPY
-; CHECK:     BL @foo, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit-def $w0
-; CHECK:     {{%[0-9]+}}:_(s32) = COPY $w0
-; CHECK:     EH_LABEL
-; CHECK:     G_BR %[[GOOD]]
-
-; CHECK:   [[BAD]].{{[a-z]+}} (landing-pad):
-; CHECK:     EH_LABEL
-; CHECK:     [[PTR_RET:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK:     [[SEL_PTR:%[0-9]+]]:_(p0) = COPY $x1
-; CHECK:     [[SEL_RET:%[0-9]+]]:_(s32) = G_PTRTOINT [[SEL_PTR]]
-; CHECK:     $x0 = COPY [[PTR_RET]]
-; CHECK:     $w1 = COPY [[SEL_RET]]
-
-; CHECK:   [[GOOD]].{{[a-z]+}}:
-; CHECK:     [[SEL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK:     $w1 = COPY [[SEL]]
-
 define { i8*, i32 } @bar() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+  ; CHECK-LABEL: name: bar
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   EH_LABEL <mcsymbol >
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   $w0 = COPY [[C]](s32)
+  ; CHECK-NEXT:   BL @foo, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit-def $w0
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   EH_LABEL <mcsymbol >
+  ; CHECK-NEXT:   G_BR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.broken (landing-pad):
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   EH_LABEL <mcsymbol >
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY2]](p0)
+  ; CHECK-NEXT:   $x0 = COPY [[COPY1]](p0)
+  ; CHECK-NEXT:   $w1 = COPY [[PTRTOINT]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0, implicit $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.continue:
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   $x0 = COPY [[DEF]](p0)
+  ; CHECK-NEXT:   $w1 = COPY [[C1]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0, implicit $w1
   %res32 = invoke i32 @foo(i32 42) to label %continue unwind label %broken
 
 
@@ -43,10 +54,32 @@ continue:
   ret { i8*, i32 } %res.good
 }
 
-; CHECK-LABEL: name: test_invoke_indirect
-; CHECK: [[CALLEE:%[0-9]+]]:gpr64(p0) = COPY $x0
-; CHECK: BLR [[CALLEE]]
 define void @test_invoke_indirect(void()* %callee) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+  ; CHECK-LABEL: name: test_invoke_indirect
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr64(p0) = COPY $x0
+  ; CHECK-NEXT:   EH_LABEL <mcsymbol >
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   BLR [[COPY]](p0), csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   EH_LABEL <mcsymbol >
+  ; CHECK-NEXT:   G_BR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.broken (landing-pad):
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   EH_LABEL <mcsymbol >
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY2]](p0)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.continue:
+  ; CHECK-NEXT:   RET_ReallyLR
   invoke void %callee() to label %continue unwind label %broken
 
 broken:
@@ -57,27 +90,43 @@ continue:
   ret void
 }
 
-; CHECK-LABEL: name: test_invoke_varargs
-
-; CHECK: [[NULL:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
-; CHECK: [[ANSWER:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-; CHECK: [[ONE:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.0
-
-; CHECK: $x0 = COPY [[NULL]]
-
-; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp
-; CHECK: [[OFFSET:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-; CHECK: [[SLOT:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[OFFSET]](s64)
-; CHECK: [[ANSWER_EXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ANSWER]]
-; CHECK: G_STORE [[ANSWER_EXT]](s64), [[SLOT]]
-
-; CHECK: [[OFFSET:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-; CHECK: [[SLOT:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[OFFSET]](s64)
-; CHECK: G_STORE [[ONE]](s32), [[SLOT]]
-
-; CHECK: BL @printf
 declare void @printf(i8*, ...)
 define void @test_invoke_varargs() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+  ; CHECK-LABEL: name: test_invoke_varargs
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+  ; CHECK-NEXT:   EH_LABEL <mcsymbol >
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   $x0 = COPY [[C]](p0)
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $sp
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+  ; CHECK-NEXT:   G_STORE [[ANYEXT]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+  ; CHECK-NEXT:   G_STORE [[C2]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 8, align 1)
+  ; CHECK-NEXT:   BL @printf, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0
+  ; CHECK-NEXT:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   EH_LABEL <mcsymbol >
+  ; CHECK-NEXT:   G_BR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.broken (landing-pad):
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   EH_LABEL <mcsymbol >
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY2]](p0)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.continue:
+  ; CHECK-NEXT:   RET_ReallyLR
   invoke void(i8*, ...) @printf(i8* null, i32 42, float 1.0) to label %continue unwind label %broken
 
 broken:
@@ -88,29 +137,44 @@ continue:
   ret void
 }
 
-; CHECK-LABEL: name: test_lpad_phi
-; CHECK: body:
-; CHECK-NEXT:   bb.1 (%ir-block.0):
-; CHECK:     successors: %[[GOOD:bb.[0-9]+]]{{.*}}%[[BAD:bb.[0-9]+]]
-; CHECK:     [[ELEVEN:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
-; CHECK:     EH_LABEL
-; CHECK:     BL @may_throw, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp
-; CHECK:     EH_LABEL
-; CHECK:     G_BR %[[GOOD]]
-
-; CHECK:   [[BAD]].{{[a-z]+}} (landing-pad):
-; CHECK:     [[PHI_ELEVEN:%[0-9]+]]:_(s32) = G_PHI [[ELEVEN]](s32), %bb.1
-; CHECK:     EH_LABEL
-; CHECK:     G_STORE [[PHI_ELEVEN]](s32), {{%[0-9]+}}(p0) :: (store (s32) into @global_var)
-
-; CHECK:   [[GOOD]].{{[a-z]+}}:
-; CHECK:     [[SEL:%[0-9]+]]:_(s32) = G_PHI
-; CHECK:     $w0 = COPY [[SEL]]
-
 @global_var = external global i32
 
 declare void @may_throw()
 define i32 @test_lpad_phi() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+  ; CHECK-LABEL: name: test_lpad_phi
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @global_var
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 55
+  ; CHECK-NEXT:   G_STORE [[C]](s32), [[GV]](p0) :: (store (s32) into @global_var)
+  ; CHECK-NEXT:   EH_LABEL <mcsymbol >
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   BL @may_throw, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   EH_LABEL <mcsymbol >
+  ; CHECK-NEXT:   G_BR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.lpad (landing-pad):
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.1
+  ; CHECK-NEXT:   EH_LABEL <mcsymbol >
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p0)
+  ; CHECK-NEXT:   G_STORE [[PHI]](s32), [[GV]](p0) :: (store (s32) into @global_var)
+  ; CHECK-NEXT:   G_BR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.continue:
+  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C2]](s32), %bb.1, [[C3]](s32), %bb.2
+  ; CHECK-NEXT:   $w0 = COPY [[PHI1]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
   store i32 42, i32* @global_var
   invoke void @may_throw()
           to label %continue unwind label %lpad

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-s128-div.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-s128-div.mir
index 810c4d5f6c360..8b0de6dfe4271 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-s128-div.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-s128-div.mir
@@ -23,24 +23,25 @@ body:             |
 
     ; CHECK-LABEL: name: udiv_test
     ; CHECK: liveins: $x0, $x1
-    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128) from %ir.v1ptr)
-    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.v2ptr)
-    ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
-    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128)
-    ; CHECK: $x0 = COPY [[UV]](s64)
-    ; CHECK: $x1 = COPY [[UV1]](s64)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD1]](s128)
-    ; CHECK: $x2 = COPY [[UV2]](s64)
-    ; CHECK: $x3 = COPY [[UV3]](s64)
-    ; CHECK: BL &__udivti3, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit-def $x0, implicit-def $x1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x0
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x1
-    ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY2]](s64), [[COPY3]](s64)
-    ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
-    ; CHECK: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128) into %ir.v1ptr)
-    ; CHECK: RET_ReallyLR
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128) from %ir.v1ptr)
+    ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.v2ptr)
+    ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128)
+    ; CHECK-NEXT: $x0 = COPY [[UV]](s64)
+    ; CHECK-NEXT: $x1 = COPY [[UV1]](s64)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD1]](s128)
+    ; CHECK-NEXT: $x2 = COPY [[UV2]](s64)
+    ; CHECK-NEXT: $x3 = COPY [[UV3]](s64)
+    ; CHECK-NEXT: BL &__udivti3, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit-def $x0, implicit-def $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY2]](s64), [[COPY3]](s64)
+    ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+    ; CHECK-NEXT: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128) into %ir.v1ptr)
+    ; CHECK-NEXT: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s128) = G_LOAD %0(p0) :: (load (s128) from %ir.v1ptr)
@@ -64,24 +65,25 @@ body:             |
 
     ; CHECK-LABEL: name: sdiv_test
     ; CHECK: liveins: $x0, $x1
-    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128) from %ir.v1ptr)
-    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.v2ptr)
-    ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
-    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128)
-    ; CHECK: $x0 = COPY [[UV]](s64)
-    ; CHECK: $x1 = COPY [[UV1]](s64)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD1]](s128)
-    ; CHECK: $x2 = COPY [[UV2]](s64)
-    ; CHECK: $x3 = COPY [[UV3]](s64)
-    ; CHECK: BL &__divti3, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit-def $x0, implicit-def $x1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x0
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x1
-    ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY2]](s64), [[COPY3]](s64)
-    ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
-    ; CHECK: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128) into %ir.v1ptr)
-    ; CHECK: RET_ReallyLR
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128) from %ir.v1ptr)
+    ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.v2ptr)
+    ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128)
+    ; CHECK-NEXT: $x0 = COPY [[UV]](s64)
+    ; CHECK-NEXT: $x1 = COPY [[UV1]](s64)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD1]](s128)
+    ; CHECK-NEXT: $x2 = COPY [[UV2]](s64)
+    ; CHECK-NEXT: $x3 = COPY [[UV3]](s64)
+    ; CHECK-NEXT: BL &__divti3, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit-def $x0, implicit-def $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY2]](s64), [[COPY3]](s64)
+    ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+    ; CHECK-NEXT: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128) into %ir.v1ptr)
+    ; CHECK-NEXT: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s128) = G_LOAD %0(p0) :: (load (s128) from %ir.v1ptr)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
index a1fb4dbb54429..72fbdf7fbb75f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
@@ -11,102 +11,104 @@ declare hidden void @external_void_func_v32i32(<32 x i32>) #0
 define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
   ; GFX900-LABEL: name: test_call_external_void_func_i32
   ; GFX900: bb.1 (%ir-block.1):
-  ; GFX900:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GFX900:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX900:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX900:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX900:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GFX900:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GFX900:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX900:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX900:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GFX900:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; GFX900:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; GFX900:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX900:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX900:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX900:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX900:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GFX900:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
-  ; GFX900:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GFX900:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX900:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX900:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX900:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX900:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX900:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX900:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GFX900:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GFX900:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX900:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX900:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GFX900:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GFX900:   $vgpr0 = COPY [[C]](s32)
-  ; GFX900:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX900:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GFX900:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GFX900:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GFX900:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX900:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GFX900:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GFX900:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GFX900:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GFX900:   $vgpr31 = COPY [[OR1]](s32)
-  ; GFX900:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX900:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GFX900:   S_ENDPGM 0
+  ; GFX900-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GFX900-NEXT: {{  $}}
+  ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; GFX900-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX900-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX900-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX900-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GFX900-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GFX900-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GFX900-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GFX900-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX900-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GFX900-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   S_ENDPGM 0
   ; GFX908-LABEL: name: test_call_external_void_func_i32
   ; GFX908: bb.1 (%ir-block.1):
-  ; GFX908:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GFX908:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX908:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX908:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX908:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GFX908:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GFX908:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX908:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX908:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GFX908:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; GFX908:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; GFX908:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX908:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX908:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX908:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX908:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GFX908:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
-  ; GFX908:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GFX908:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX908:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX908:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX908:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX908:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX908:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX908:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GFX908:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GFX908:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX908:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX908:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GFX908:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GFX908:   $vgpr0 = COPY [[C]](s32)
-  ; GFX908:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX908:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GFX908:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GFX908:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GFX908:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX908:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GFX908:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GFX908:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GFX908:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GFX908:   $vgpr31 = COPY [[OR1]](s32)
-  ; GFX908:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX908:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GFX908:   S_ENDPGM 0
+  ; GFX908-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; GFX908-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX908-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX908-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX908-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GFX908-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GFX908-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GFX908-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GFX908-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX908-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GFX908-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   S_ENDPGM 0
   call void @external_void_func_i32(i32 42)
   ret void
 }
@@ -114,80 +116,82 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
 define void @test_func_call_external_void_func_i32() #0 {
   ; GFX900-LABEL: name: test_func_call_external_void_func_i32
   ; GFX900: bb.1 (%ir-block.0):
-  ; GFX900:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GFX900:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GFX900:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX900:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX900:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX900:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX900:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX900:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GFX900:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99
-  ; GFX900:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX900:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX900:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX900:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GFX900:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GFX900:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GFX900:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GFX900:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GFX900:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX900:   $vgpr0 = COPY [[C]](s32)
-  ; GFX900:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GFX900:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
-  ; GFX900:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GFX900:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GFX900:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
-  ; GFX900:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
-  ; GFX900:   $sgpr12 = COPY [[COPY13]](s32)
-  ; GFX900:   $sgpr13 = COPY [[COPY14]](s32)
-  ; GFX900:   $sgpr14 = COPY [[COPY15]](s32)
-  ; GFX900:   $vgpr31 = COPY [[COPY16]](s32)
-  ; GFX900:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX900:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GFX900:   [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
-  ; GFX900:   S_SETPC_B64_return [[COPY18]]
+  ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; GFX900-NEXT: {{  $}}
+  ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99
+  ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GFX900-NEXT:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; GFX900-NEXT:   $vgpr31 = COPY [[COPY16]](s32)
+  ; GFX900-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; GFX900-NEXT:   S_SETPC_B64_return [[COPY18]]
   ; GFX908-LABEL: name: test_func_call_external_void_func_i32
   ; GFX908: bb.1 (%ir-block.0):
-  ; GFX908:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GFX908:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GFX908:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX908:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX908:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX908:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX908:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX908:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GFX908:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99
-  ; GFX908:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX908:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX908:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX908:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GFX908:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GFX908:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GFX908:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GFX908:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GFX908:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX908:   $vgpr0 = COPY [[C]](s32)
-  ; GFX908:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GFX908:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
-  ; GFX908:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GFX908:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GFX908:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
-  ; GFX908:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
-  ; GFX908:   $sgpr12 = COPY [[COPY13]](s32)
-  ; GFX908:   $sgpr13 = COPY [[COPY14]](s32)
-  ; GFX908:   $sgpr14 = COPY [[COPY15]](s32)
-  ; GFX908:   $vgpr31 = COPY [[COPY16]](s32)
-  ; GFX908:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX908:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GFX908:   [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
-  ; GFX908:   S_SETPC_B64_return [[COPY18]]
+  ; GFX908-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99
+  ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GFX908-NEXT:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; GFX908-NEXT:   $vgpr31 = COPY [[COPY16]](s32)
+  ; GFX908-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; GFX908-NEXT:   S_SETPC_B64_return [[COPY18]]
   call void @external_void_func_i32(i32 99)
   ret void
 }
@@ -197,174 +201,176 @@ define void @test_func_call_external_void_func_i32() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
   ; GFX900-LABEL: name: test_call_external_void_func_v32i32
   ; GFX900: bb.1 (%ir-block.1):
-  ; GFX900:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GFX900:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX900:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX900:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX900:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GFX900:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GFX900:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX900:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX900:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GFX900:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GFX900:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
-  ; GFX900:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; GFX900:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX900:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
-  ; GFX900:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX900:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX900:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GFX900:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
-  ; GFX900:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GFX900:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX900:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX900:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX900:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX900:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX900:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX900:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GFX900:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GFX900:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX900:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX900:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GFX900:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GFX900:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
-  ; GFX900:   $vgpr0 = COPY [[UV]](s32)
-  ; GFX900:   $vgpr1 = COPY [[UV1]](s32)
-  ; GFX900:   $vgpr2 = COPY [[UV2]](s32)
-  ; GFX900:   $vgpr3 = COPY [[UV3]](s32)
-  ; GFX900:   $vgpr4 = COPY [[UV4]](s32)
-  ; GFX900:   $vgpr5 = COPY [[UV5]](s32)
-  ; GFX900:   $vgpr6 = COPY [[UV6]](s32)
-  ; GFX900:   $vgpr7 = COPY [[UV7]](s32)
-  ; GFX900:   $vgpr8 = COPY [[UV8]](s32)
-  ; GFX900:   $vgpr9 = COPY [[UV9]](s32)
-  ; GFX900:   $vgpr10 = COPY [[UV10]](s32)
-  ; GFX900:   $vgpr11 = COPY [[UV11]](s32)
-  ; GFX900:   $vgpr12 = COPY [[UV12]](s32)
-  ; GFX900:   $vgpr13 = COPY [[UV13]](s32)
-  ; GFX900:   $vgpr14 = COPY [[UV14]](s32)
-  ; GFX900:   $vgpr15 = COPY [[UV15]](s32)
-  ; GFX900:   $vgpr16 = COPY [[UV16]](s32)
-  ; GFX900:   $vgpr17 = COPY [[UV17]](s32)
-  ; GFX900:   $vgpr18 = COPY [[UV18]](s32)
-  ; GFX900:   $vgpr19 = COPY [[UV19]](s32)
-  ; GFX900:   $vgpr20 = COPY [[UV20]](s32)
-  ; GFX900:   $vgpr21 = COPY [[UV21]](s32)
-  ; GFX900:   $vgpr22 = COPY [[UV22]](s32)
-  ; GFX900:   $vgpr23 = COPY [[UV23]](s32)
-  ; GFX900:   $vgpr24 = COPY [[UV24]](s32)
-  ; GFX900:   $vgpr25 = COPY [[UV25]](s32)
-  ; GFX900:   $vgpr26 = COPY [[UV26]](s32)
-  ; GFX900:   $vgpr27 = COPY [[UV27]](s32)
-  ; GFX900:   $vgpr28 = COPY [[UV28]](s32)
-  ; GFX900:   $vgpr29 = COPY [[UV29]](s32)
-  ; GFX900:   $vgpr30 = COPY [[UV30]](s32)
-  ; GFX900:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
-  ; GFX900:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GFX900:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
-  ; GFX900:   G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
-  ; GFX900:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX900:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
-  ; GFX900:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GFX900:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GFX900:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX900:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GFX900:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GFX900:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GFX900:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GFX900:   $vgpr31 = COPY [[OR1]](s32)
-  ; GFX900:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX900:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
-  ; GFX900:   S_ENDPGM 0
+  ; GFX900-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GFX900-NEXT: {{  $}}
+  ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; GFX900-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+  ; GFX900-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX900-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX900-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX900-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GFX900-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GFX900-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GFX900-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
+  ; GFX900-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; GFX900-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; GFX900-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; GFX900-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; GFX900-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; GFX900-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; GFX900-NEXT:   $vgpr6 = COPY [[UV6]](s32)
+  ; GFX900-NEXT:   $vgpr7 = COPY [[UV7]](s32)
+  ; GFX900-NEXT:   $vgpr8 = COPY [[UV8]](s32)
+  ; GFX900-NEXT:   $vgpr9 = COPY [[UV9]](s32)
+  ; GFX900-NEXT:   $vgpr10 = COPY [[UV10]](s32)
+  ; GFX900-NEXT:   $vgpr11 = COPY [[UV11]](s32)
+  ; GFX900-NEXT:   $vgpr12 = COPY [[UV12]](s32)
+  ; GFX900-NEXT:   $vgpr13 = COPY [[UV13]](s32)
+  ; GFX900-NEXT:   $vgpr14 = COPY [[UV14]](s32)
+  ; GFX900-NEXT:   $vgpr15 = COPY [[UV15]](s32)
+  ; GFX900-NEXT:   $vgpr16 = COPY [[UV16]](s32)
+  ; GFX900-NEXT:   $vgpr17 = COPY [[UV17]](s32)
+  ; GFX900-NEXT:   $vgpr18 = COPY [[UV18]](s32)
+  ; GFX900-NEXT:   $vgpr19 = COPY [[UV19]](s32)
+  ; GFX900-NEXT:   $vgpr20 = COPY [[UV20]](s32)
+  ; GFX900-NEXT:   $vgpr21 = COPY [[UV21]](s32)
+  ; GFX900-NEXT:   $vgpr22 = COPY [[UV22]](s32)
+  ; GFX900-NEXT:   $vgpr23 = COPY [[UV23]](s32)
+  ; GFX900-NEXT:   $vgpr24 = COPY [[UV24]](s32)
+  ; GFX900-NEXT:   $vgpr25 = COPY [[UV25]](s32)
+  ; GFX900-NEXT:   $vgpr26 = COPY [[UV26]](s32)
+  ; GFX900-NEXT:   $vgpr27 = COPY [[UV27]](s32)
+  ; GFX900-NEXT:   $vgpr28 = COPY [[UV28]](s32)
+  ; GFX900-NEXT:   $vgpr29 = COPY [[UV29]](s32)
+  ; GFX900-NEXT:   $vgpr30 = COPY [[UV30]](s32)
+  ; GFX900-NEXT:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
+  ; GFX900-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; GFX900-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
+  ; GFX900-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+  ; GFX900-NEXT:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX900-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GFX900-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
+  ; GFX900-NEXT:   S_ENDPGM 0
   ; GFX908-LABEL: name: test_call_external_void_func_v32i32
   ; GFX908: bb.1 (%ir-block.1):
-  ; GFX908:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GFX908:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX908:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX908:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX908:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GFX908:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GFX908:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX908:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX908:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GFX908:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GFX908:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
-  ; GFX908:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; GFX908:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX908:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
-  ; GFX908:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX908:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX908:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GFX908:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
-  ; GFX908:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GFX908:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX908:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX908:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX908:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX908:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX908:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX908:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GFX908:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GFX908:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX908:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX908:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GFX908:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GFX908:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
-  ; GFX908:   $vgpr0 = COPY [[UV]](s32)
-  ; GFX908:   $vgpr1 = COPY [[UV1]](s32)
-  ; GFX908:   $vgpr2 = COPY [[UV2]](s32)
-  ; GFX908:   $vgpr3 = COPY [[UV3]](s32)
-  ; GFX908:   $vgpr4 = COPY [[UV4]](s32)
-  ; GFX908:   $vgpr5 = COPY [[UV5]](s32)
-  ; GFX908:   $vgpr6 = COPY [[UV6]](s32)
-  ; GFX908:   $vgpr7 = COPY [[UV7]](s32)
-  ; GFX908:   $vgpr8 = COPY [[UV8]](s32)
-  ; GFX908:   $vgpr9 = COPY [[UV9]](s32)
-  ; GFX908:   $vgpr10 = COPY [[UV10]](s32)
-  ; GFX908:   $vgpr11 = COPY [[UV11]](s32)
-  ; GFX908:   $vgpr12 = COPY [[UV12]](s32)
-  ; GFX908:   $vgpr13 = COPY [[UV13]](s32)
-  ; GFX908:   $vgpr14 = COPY [[UV14]](s32)
-  ; GFX908:   $vgpr15 = COPY [[UV15]](s32)
-  ; GFX908:   $vgpr16 = COPY [[UV16]](s32)
-  ; GFX908:   $vgpr17 = COPY [[UV17]](s32)
-  ; GFX908:   $vgpr18 = COPY [[UV18]](s32)
-  ; GFX908:   $vgpr19 = COPY [[UV19]](s32)
-  ; GFX908:   $vgpr20 = COPY [[UV20]](s32)
-  ; GFX908:   $vgpr21 = COPY [[UV21]](s32)
-  ; GFX908:   $vgpr22 = COPY [[UV22]](s32)
-  ; GFX908:   $vgpr23 = COPY [[UV23]](s32)
-  ; GFX908:   $vgpr24 = COPY [[UV24]](s32)
-  ; GFX908:   $vgpr25 = COPY [[UV25]](s32)
-  ; GFX908:   $vgpr26 = COPY [[UV26]](s32)
-  ; GFX908:   $vgpr27 = COPY [[UV27]](s32)
-  ; GFX908:   $vgpr28 = COPY [[UV28]](s32)
-  ; GFX908:   $vgpr29 = COPY [[UV29]](s32)
-  ; GFX908:   $vgpr30 = COPY [[UV30]](s32)
-  ; GFX908:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
-  ; GFX908:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GFX908:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
-  ; GFX908:   G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
-  ; GFX908:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX908:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
-  ; GFX908:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GFX908:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GFX908:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX908:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GFX908:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GFX908:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GFX908:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GFX908:   $vgpr31 = COPY [[OR1]](s32)
-  ; GFX908:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX908:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
-  ; GFX908:   S_ENDPGM 0
+  ; GFX908-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; GFX908-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+  ; GFX908-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX908-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX908-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX908-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GFX908-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GFX908-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GFX908-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
+  ; GFX908-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; GFX908-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; GFX908-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; GFX908-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; GFX908-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; GFX908-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; GFX908-NEXT:   $vgpr6 = COPY [[UV6]](s32)
+  ; GFX908-NEXT:   $vgpr7 = COPY [[UV7]](s32)
+  ; GFX908-NEXT:   $vgpr8 = COPY [[UV8]](s32)
+  ; GFX908-NEXT:   $vgpr9 = COPY [[UV9]](s32)
+  ; GFX908-NEXT:   $vgpr10 = COPY [[UV10]](s32)
+  ; GFX908-NEXT:   $vgpr11 = COPY [[UV11]](s32)
+  ; GFX908-NEXT:   $vgpr12 = COPY [[UV12]](s32)
+  ; GFX908-NEXT:   $vgpr13 = COPY [[UV13]](s32)
+  ; GFX908-NEXT:   $vgpr14 = COPY [[UV14]](s32)
+  ; GFX908-NEXT:   $vgpr15 = COPY [[UV15]](s32)
+  ; GFX908-NEXT:   $vgpr16 = COPY [[UV16]](s32)
+  ; GFX908-NEXT:   $vgpr17 = COPY [[UV17]](s32)
+  ; GFX908-NEXT:   $vgpr18 = COPY [[UV18]](s32)
+  ; GFX908-NEXT:   $vgpr19 = COPY [[UV19]](s32)
+  ; GFX908-NEXT:   $vgpr20 = COPY [[UV20]](s32)
+  ; GFX908-NEXT:   $vgpr21 = COPY [[UV21]](s32)
+  ; GFX908-NEXT:   $vgpr22 = COPY [[UV22]](s32)
+  ; GFX908-NEXT:   $vgpr23 = COPY [[UV23]](s32)
+  ; GFX908-NEXT:   $vgpr24 = COPY [[UV24]](s32)
+  ; GFX908-NEXT:   $vgpr25 = COPY [[UV25]](s32)
+  ; GFX908-NEXT:   $vgpr26 = COPY [[UV26]](s32)
+  ; GFX908-NEXT:   $vgpr27 = COPY [[UV27]](s32)
+  ; GFX908-NEXT:   $vgpr28 = COPY [[UV28]](s32)
+  ; GFX908-NEXT:   $vgpr29 = COPY [[UV29]](s32)
+  ; GFX908-NEXT:   $vgpr30 = COPY [[UV30]](s32)
+  ; GFX908-NEXT:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
+  ; GFX908-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; GFX908-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
+  ; GFX908-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+  ; GFX908-NEXT:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX908-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GFX908-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
+  ; GFX908-NEXT:   S_ENDPGM 0
   call void @external_void_func_v32i32(<32 x i32> zeroinitializer)
   ret void
 }
@@ -372,254 +378,256 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
 define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
   ; GFX900-LABEL: name: test_func_call_external_void_func_v32i32
   ; GFX900: bb.1 (%ir-block.1):
-  ; GFX900:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GFX900:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GFX900:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX900:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX900:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX900:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX900:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX900:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX900:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32)
-  ; GFX900:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
-  ; GFX900:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX900:   [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
-  ; GFX900:   [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
-  ; GFX900:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX900:   [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
-  ; GFX900:   [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16)
-  ; GFX900:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX900:   [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
-  ; GFX900:   [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16)
-  ; GFX900:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GFX900:   [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
-  ; GFX900:   [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16)
-  ; GFX900:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GFX900:   [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
-  ; GFX900:   [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC10]](s16)
-  ; GFX900:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GFX900:   [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32)
-  ; GFX900:   [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC12]](s16)
-  ; GFX900:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GFX900:   [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32)
-  ; GFX900:   [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC14]](s16)
-  ; GFX900:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GFX900:   [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32)
-  ; GFX900:   [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC16]](s16)
-  ; GFX900:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GFX900:   [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32)
-  ; GFX900:   [[TRUNC19:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC18]](s16)
-  ; GFX900:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GFX900:   [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32)
-  ; GFX900:   [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC20]](s16)
-  ; GFX900:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GFX900:   [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
-  ; GFX900:   [[TRUNC23:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC22]](s16)
-  ; GFX900:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GFX900:   [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32)
-  ; GFX900:   [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC24]](s16)
-  ; GFX900:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GFX900:   [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
-  ; GFX900:   [[TRUNC27:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC26]](s16)
-  ; GFX900:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GFX900:   [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32)
-  ; GFX900:   [[TRUNC29:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC28]](s16)
-  ; GFX900:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GFX900:   [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32)
-  ; GFX900:   [[TRUNC31:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC30]](s16)
-  ; GFX900:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; GFX900:   [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32)
-  ; GFX900:   [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16)
-  ; GFX900:   [[COPY25:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GFX900:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GFX900:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
-  ; GFX900:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX900:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
-  ; GFX900:   [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX900:   [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GFX900:   [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GFX900:   [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GFX900:   [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900:   [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GFX900:   [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GFX900:   [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX900:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
-  ; GFX900:   $vgpr0 = COPY [[UV]](s32)
-  ; GFX900:   $vgpr1 = COPY [[UV1]](s32)
-  ; GFX900:   $vgpr2 = COPY [[UV2]](s32)
-  ; GFX900:   $vgpr3 = COPY [[UV3]](s32)
-  ; GFX900:   $vgpr4 = COPY [[UV4]](s32)
-  ; GFX900:   $vgpr5 = COPY [[UV5]](s32)
-  ; GFX900:   $vgpr6 = COPY [[UV6]](s32)
-  ; GFX900:   $vgpr7 = COPY [[UV7]](s32)
-  ; GFX900:   $vgpr8 = COPY [[UV8]](s32)
-  ; GFX900:   $vgpr9 = COPY [[UV9]](s32)
-  ; GFX900:   $vgpr10 = COPY [[UV10]](s32)
-  ; GFX900:   $vgpr11 = COPY [[UV11]](s32)
-  ; GFX900:   $vgpr12 = COPY [[UV12]](s32)
-  ; GFX900:   $vgpr13 = COPY [[UV13]](s32)
-  ; GFX900:   $vgpr14 = COPY [[UV14]](s32)
-  ; GFX900:   $vgpr15 = COPY [[UV15]](s32)
-  ; GFX900:   $vgpr16 = COPY [[UV16]](s32)
-  ; GFX900:   $vgpr17 = COPY [[UV17]](s32)
-  ; GFX900:   $vgpr18 = COPY [[UV18]](s32)
-  ; GFX900:   $vgpr19 = COPY [[UV19]](s32)
-  ; GFX900:   $vgpr20 = COPY [[UV20]](s32)
-  ; GFX900:   $vgpr21 = COPY [[UV21]](s32)
-  ; GFX900:   $vgpr22 = COPY [[UV22]](s32)
-  ; GFX900:   $vgpr23 = COPY [[UV23]](s32)
-  ; GFX900:   $vgpr24 = COPY [[UV24]](s32)
-  ; GFX900:   $vgpr25 = COPY [[UV25]](s32)
-  ; GFX900:   $vgpr26 = COPY [[UV26]](s32)
-  ; GFX900:   $vgpr27 = COPY [[UV27]](s32)
-  ; GFX900:   $vgpr28 = COPY [[UV28]](s32)
-  ; GFX900:   $vgpr29 = COPY [[UV29]](s32)
-  ; GFX900:   $vgpr30 = COPY [[UV30]](s32)
-  ; GFX900:   [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32
-  ; GFX900:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GFX900:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32)
-  ; GFX900:   G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
-  ; GFX900:   [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GFX900:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>)
-  ; GFX900:   $sgpr4_sgpr5 = COPY [[COPY26]](p4)
-  ; GFX900:   $sgpr6_sgpr7 = COPY [[COPY27]](p4)
-  ; GFX900:   $sgpr8_sgpr9 = COPY [[COPY28]](p4)
-  ; GFX900:   $sgpr10_sgpr11 = COPY [[COPY29]](s64)
-  ; GFX900:   $sgpr12 = COPY [[COPY30]](s32)
-  ; GFX900:   $sgpr13 = COPY [[COPY31]](s32)
-  ; GFX900:   $sgpr14 = COPY [[COPY32]](s32)
-  ; GFX900:   $vgpr31 = COPY [[COPY33]](s32)
-  ; GFX900:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX900:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
-  ; GFX900:   [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]]
-  ; GFX900:   S_SETPC_B64_return [[COPY36]]
+  ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; GFX900-NEXT: {{  $}}
+  ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GFX900-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32)
+  ; GFX900-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GFX900-NEXT:   [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
+  ; GFX900-NEXT:   [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GFX900-NEXT:   [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
+  ; GFX900-NEXT:   [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16)
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GFX900-NEXT:   [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
+  ; GFX900-NEXT:   [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16)
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GFX900-NEXT:   [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
+  ; GFX900-NEXT:   [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16)
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GFX900-NEXT:   [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
+  ; GFX900-NEXT:   [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC10]](s16)
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GFX900-NEXT:   [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32)
+  ; GFX900-NEXT:   [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC12]](s16)
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GFX900-NEXT:   [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32)
+  ; GFX900-NEXT:   [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC14]](s16)
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GFX900-NEXT:   [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32)
+  ; GFX900-NEXT:   [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC16]](s16)
+  ; GFX900-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GFX900-NEXT:   [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32)
+  ; GFX900-NEXT:   [[TRUNC19:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC18]](s16)
+  ; GFX900-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GFX900-NEXT:   [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32)
+  ; GFX900-NEXT:   [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC20]](s16)
+  ; GFX900-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GFX900-NEXT:   [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+  ; GFX900-NEXT:   [[TRUNC23:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC22]](s16)
+  ; GFX900-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GFX900-NEXT:   [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32)
+  ; GFX900-NEXT:   [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC24]](s16)
+  ; GFX900-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GFX900-NEXT:   [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
+  ; GFX900-NEXT:   [[TRUNC27:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC26]](s16)
+  ; GFX900-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GFX900-NEXT:   [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32)
+  ; GFX900-NEXT:   [[TRUNC29:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC28]](s16)
+  ; GFX900-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GFX900-NEXT:   [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32)
+  ; GFX900-NEXT:   [[TRUNC31:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC30]](s16)
+  ; GFX900-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; GFX900-NEXT:   [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32)
+  ; GFX900-NEXT:   [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16)
+  ; GFX900-NEXT:   [[COPY25:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; GFX900-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+  ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
+  ; GFX900-NEXT:   [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX900-NEXT:   [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GFX900-NEXT:   [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GFX900-NEXT:   [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
+  ; GFX900-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; GFX900-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; GFX900-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; GFX900-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; GFX900-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; GFX900-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; GFX900-NEXT:   $vgpr6 = COPY [[UV6]](s32)
+  ; GFX900-NEXT:   $vgpr7 = COPY [[UV7]](s32)
+  ; GFX900-NEXT:   $vgpr8 = COPY [[UV8]](s32)
+  ; GFX900-NEXT:   $vgpr9 = COPY [[UV9]](s32)
+  ; GFX900-NEXT:   $vgpr10 = COPY [[UV10]](s32)
+  ; GFX900-NEXT:   $vgpr11 = COPY [[UV11]](s32)
+  ; GFX900-NEXT:   $vgpr12 = COPY [[UV12]](s32)
+  ; GFX900-NEXT:   $vgpr13 = COPY [[UV13]](s32)
+  ; GFX900-NEXT:   $vgpr14 = COPY [[UV14]](s32)
+  ; GFX900-NEXT:   $vgpr15 = COPY [[UV15]](s32)
+  ; GFX900-NEXT:   $vgpr16 = COPY [[UV16]](s32)
+  ; GFX900-NEXT:   $vgpr17 = COPY [[UV17]](s32)
+  ; GFX900-NEXT:   $vgpr18 = COPY [[UV18]](s32)
+  ; GFX900-NEXT:   $vgpr19 = COPY [[UV19]](s32)
+  ; GFX900-NEXT:   $vgpr20 = COPY [[UV20]](s32)
+  ; GFX900-NEXT:   $vgpr21 = COPY [[UV21]](s32)
+  ; GFX900-NEXT:   $vgpr22 = COPY [[UV22]](s32)
+  ; GFX900-NEXT:   $vgpr23 = COPY [[UV23]](s32)
+  ; GFX900-NEXT:   $vgpr24 = COPY [[UV24]](s32)
+  ; GFX900-NEXT:   $vgpr25 = COPY [[UV25]](s32)
+  ; GFX900-NEXT:   $vgpr26 = COPY [[UV26]](s32)
+  ; GFX900-NEXT:   $vgpr27 = COPY [[UV27]](s32)
+  ; GFX900-NEXT:   $vgpr28 = COPY [[UV28]](s32)
+  ; GFX900-NEXT:   $vgpr29 = COPY [[UV29]](s32)
+  ; GFX900-NEXT:   $vgpr30 = COPY [[UV30]](s32)
+  ; GFX900-NEXT:   [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32
+  ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32)
+  ; GFX900-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+  ; GFX900-NEXT:   [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY26]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY27]](p4)
+  ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[COPY28]](p4)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY29]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY30]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY31]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY32]](s32)
+  ; GFX900-NEXT:   $vgpr31 = COPY [[COPY33]](s32)
+  ; GFX900-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
+  ; GFX900-NEXT:   [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]]
+  ; GFX900-NEXT:   S_SETPC_B64_return [[COPY36]]
   ; GFX908-LABEL: name: test_func_call_external_void_func_v32i32
   ; GFX908: bb.1 (%ir-block.1):
-  ; GFX908:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GFX908:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GFX908:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GFX908:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GFX908:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX908:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GFX908:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX908:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX908:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32)
-  ; GFX908:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
-  ; GFX908:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX908:   [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
-  ; GFX908:   [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
-  ; GFX908:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX908:   [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
-  ; GFX908:   [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16)
-  ; GFX908:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX908:   [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
-  ; GFX908:   [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16)
-  ; GFX908:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GFX908:   [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
-  ; GFX908:   [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16)
-  ; GFX908:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GFX908:   [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
-  ; GFX908:   [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC10]](s16)
-  ; GFX908:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GFX908:   [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32)
-  ; GFX908:   [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC12]](s16)
-  ; GFX908:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GFX908:   [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32)
-  ; GFX908:   [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC14]](s16)
-  ; GFX908:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GFX908:   [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32)
-  ; GFX908:   [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC16]](s16)
-  ; GFX908:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GFX908:   [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32)
-  ; GFX908:   [[TRUNC19:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC18]](s16)
-  ; GFX908:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GFX908:   [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32)
-  ; GFX908:   [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC20]](s16)
-  ; GFX908:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GFX908:   [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
-  ; GFX908:   [[TRUNC23:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC22]](s16)
-  ; GFX908:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GFX908:   [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32)
-  ; GFX908:   [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC24]](s16)
-  ; GFX908:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GFX908:   [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
-  ; GFX908:   [[TRUNC27:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC26]](s16)
-  ; GFX908:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GFX908:   [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32)
-  ; GFX908:   [[TRUNC29:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC28]](s16)
-  ; GFX908:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GFX908:   [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32)
-  ; GFX908:   [[TRUNC31:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC30]](s16)
-  ; GFX908:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; GFX908:   [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32)
-  ; GFX908:   [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16)
-  ; GFX908:   [[COPY25:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GFX908:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GFX908:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
-  ; GFX908:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX908:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
-  ; GFX908:   [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX908:   [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GFX908:   [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GFX908:   [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GFX908:   [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908:   [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GFX908:   [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GFX908:   [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX908:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
-  ; GFX908:   $vgpr0 = COPY [[UV]](s32)
-  ; GFX908:   $vgpr1 = COPY [[UV1]](s32)
-  ; GFX908:   $vgpr2 = COPY [[UV2]](s32)
-  ; GFX908:   $vgpr3 = COPY [[UV3]](s32)
-  ; GFX908:   $vgpr4 = COPY [[UV4]](s32)
-  ; GFX908:   $vgpr5 = COPY [[UV5]](s32)
-  ; GFX908:   $vgpr6 = COPY [[UV6]](s32)
-  ; GFX908:   $vgpr7 = COPY [[UV7]](s32)
-  ; GFX908:   $vgpr8 = COPY [[UV8]](s32)
-  ; GFX908:   $vgpr9 = COPY [[UV9]](s32)
-  ; GFX908:   $vgpr10 = COPY [[UV10]](s32)
-  ; GFX908:   $vgpr11 = COPY [[UV11]](s32)
-  ; GFX908:   $vgpr12 = COPY [[UV12]](s32)
-  ; GFX908:   $vgpr13 = COPY [[UV13]](s32)
-  ; GFX908:   $vgpr14 = COPY [[UV14]](s32)
-  ; GFX908:   $vgpr15 = COPY [[UV15]](s32)
-  ; GFX908:   $vgpr16 = COPY [[UV16]](s32)
-  ; GFX908:   $vgpr17 = COPY [[UV17]](s32)
-  ; GFX908:   $vgpr18 = COPY [[UV18]](s32)
-  ; GFX908:   $vgpr19 = COPY [[UV19]](s32)
-  ; GFX908:   $vgpr20 = COPY [[UV20]](s32)
-  ; GFX908:   $vgpr21 = COPY [[UV21]](s32)
-  ; GFX908:   $vgpr22 = COPY [[UV22]](s32)
-  ; GFX908:   $vgpr23 = COPY [[UV23]](s32)
-  ; GFX908:   $vgpr24 = COPY [[UV24]](s32)
-  ; GFX908:   $vgpr25 = COPY [[UV25]](s32)
-  ; GFX908:   $vgpr26 = COPY [[UV26]](s32)
-  ; GFX908:   $vgpr27 = COPY [[UV27]](s32)
-  ; GFX908:   $vgpr28 = COPY [[UV28]](s32)
-  ; GFX908:   $vgpr29 = COPY [[UV29]](s32)
-  ; GFX908:   $vgpr30 = COPY [[UV30]](s32)
-  ; GFX908:   [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32
-  ; GFX908:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GFX908:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32)
-  ; GFX908:   G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
-  ; GFX908:   [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GFX908:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>)
-  ; GFX908:   $sgpr4_sgpr5 = COPY [[COPY26]](p4)
-  ; GFX908:   $sgpr6_sgpr7 = COPY [[COPY27]](p4)
-  ; GFX908:   $sgpr8_sgpr9 = COPY [[COPY28]](p4)
-  ; GFX908:   $sgpr10_sgpr11 = COPY [[COPY29]](s64)
-  ; GFX908:   $sgpr12 = COPY [[COPY30]](s32)
-  ; GFX908:   $sgpr13 = COPY [[COPY31]](s32)
-  ; GFX908:   $sgpr14 = COPY [[COPY32]](s32)
-  ; GFX908:   $vgpr31 = COPY [[COPY33]](s32)
-  ; GFX908:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX908:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
-  ; GFX908:   [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]]
-  ; GFX908:   S_SETPC_B64_return [[COPY36]]
+  ; GFX908-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GFX908-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32)
+  ; GFX908-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GFX908-NEXT:   [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
+  ; GFX908-NEXT:   [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GFX908-NEXT:   [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
+  ; GFX908-NEXT:   [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16)
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GFX908-NEXT:   [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
+  ; GFX908-NEXT:   [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16)
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GFX908-NEXT:   [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
+  ; GFX908-NEXT:   [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16)
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GFX908-NEXT:   [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
+  ; GFX908-NEXT:   [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC10]](s16)
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GFX908-NEXT:   [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32)
+  ; GFX908-NEXT:   [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC12]](s16)
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GFX908-NEXT:   [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32)
+  ; GFX908-NEXT:   [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC14]](s16)
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GFX908-NEXT:   [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32)
+  ; GFX908-NEXT:   [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC16]](s16)
+  ; GFX908-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GFX908-NEXT:   [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32)
+  ; GFX908-NEXT:   [[TRUNC19:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC18]](s16)
+  ; GFX908-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GFX908-NEXT:   [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32)
+  ; GFX908-NEXT:   [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC20]](s16)
+  ; GFX908-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GFX908-NEXT:   [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+  ; GFX908-NEXT:   [[TRUNC23:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC22]](s16)
+  ; GFX908-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GFX908-NEXT:   [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32)
+  ; GFX908-NEXT:   [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC24]](s16)
+  ; GFX908-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GFX908-NEXT:   [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
+  ; GFX908-NEXT:   [[TRUNC27:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC26]](s16)
+  ; GFX908-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GFX908-NEXT:   [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32)
+  ; GFX908-NEXT:   [[TRUNC29:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC28]](s16)
+  ; GFX908-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GFX908-NEXT:   [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32)
+  ; GFX908-NEXT:   [[TRUNC31:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC30]](s16)
+  ; GFX908-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; GFX908-NEXT:   [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32)
+  ; GFX908-NEXT:   [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16)
+  ; GFX908-NEXT:   [[COPY25:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; GFX908-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+  ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
+  ; GFX908-NEXT:   [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX908-NEXT:   [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GFX908-NEXT:   [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GFX908-NEXT:   [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
+  ; GFX908-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; GFX908-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; GFX908-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; GFX908-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; GFX908-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; GFX908-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; GFX908-NEXT:   $vgpr6 = COPY [[UV6]](s32)
+  ; GFX908-NEXT:   $vgpr7 = COPY [[UV7]](s32)
+  ; GFX908-NEXT:   $vgpr8 = COPY [[UV8]](s32)
+  ; GFX908-NEXT:   $vgpr9 = COPY [[UV9]](s32)
+  ; GFX908-NEXT:   $vgpr10 = COPY [[UV10]](s32)
+  ; GFX908-NEXT:   $vgpr11 = COPY [[UV11]](s32)
+  ; GFX908-NEXT:   $vgpr12 = COPY [[UV12]](s32)
+  ; GFX908-NEXT:   $vgpr13 = COPY [[UV13]](s32)
+  ; GFX908-NEXT:   $vgpr14 = COPY [[UV14]](s32)
+  ; GFX908-NEXT:   $vgpr15 = COPY [[UV15]](s32)
+  ; GFX908-NEXT:   $vgpr16 = COPY [[UV16]](s32)
+  ; GFX908-NEXT:   $vgpr17 = COPY [[UV17]](s32)
+  ; GFX908-NEXT:   $vgpr18 = COPY [[UV18]](s32)
+  ; GFX908-NEXT:   $vgpr19 = COPY [[UV19]](s32)
+  ; GFX908-NEXT:   $vgpr20 = COPY [[UV20]](s32)
+  ; GFX908-NEXT:   $vgpr21 = COPY [[UV21]](s32)
+  ; GFX908-NEXT:   $vgpr22 = COPY [[UV22]](s32)
+  ; GFX908-NEXT:   $vgpr23 = COPY [[UV23]](s32)
+  ; GFX908-NEXT:   $vgpr24 = COPY [[UV24]](s32)
+  ; GFX908-NEXT:   $vgpr25 = COPY [[UV25]](s32)
+  ; GFX908-NEXT:   $vgpr26 = COPY [[UV26]](s32)
+  ; GFX908-NEXT:   $vgpr27 = COPY [[UV27]](s32)
+  ; GFX908-NEXT:   $vgpr28 = COPY [[UV28]](s32)
+  ; GFX908-NEXT:   $vgpr29 = COPY [[UV29]](s32)
+  ; GFX908-NEXT:   $vgpr30 = COPY [[UV30]](s32)
+  ; GFX908-NEXT:   [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32
+  ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32)
+  ; GFX908-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+  ; GFX908-NEXT:   [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY26]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY27]](p4)
+  ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[COPY28]](p4)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY29]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY30]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY31]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY32]](s32)
+  ; GFX908-NEXT:   $vgpr31 = COPY [[COPY33]](s32)
+  ; GFX908-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
+  ; GFX908-NEXT:   [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]]
+  ; GFX908-NEXT:   S_SETPC_B64_return [[COPY36]]
   call void @external_void_func_v32i32(<32 x i32> zeroinitializer)
   ret void
 }
@@ -629,100 +637,102 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
 define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0 {
   ; GFX900-LABEL: name: test_only_workitem_id_x
   ; GFX900: bb.1 (%ir-block.0):
-  ; GFX900:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GFX900:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX900:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX900:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX900:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GFX900:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GFX900:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX900:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX900:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GFX900:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; GFX900:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX900:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX900:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX900:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX900:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GFX900:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GFX900:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX900:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX900:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX900:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX900:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX900:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX900:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GFX900:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GFX900:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX900:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX900:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GFX900:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GFX900:   $vgpr0 = COPY [[C]](s32)
-  ; GFX900:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX900:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GFX900:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GFX900:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GFX900:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX900:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GFX900:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GFX900:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GFX900:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GFX900:   $vgpr31 = COPY [[OR1]](s32)
-  ; GFX900:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX900:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GFX900:   S_ENDPGM 0
+  ; GFX900-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GFX900-NEXT: {{  $}}
+  ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX900-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX900-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX900-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GFX900-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GFX900-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GFX900-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GFX900-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX900-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GFX900-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   S_ENDPGM 0
   ; GFX908-LABEL: name: test_only_workitem_id_x
   ; GFX908: bb.1 (%ir-block.0):
-  ; GFX908:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GFX908:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX908:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX908:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX908:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GFX908:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GFX908:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX908:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX908:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GFX908:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; GFX908:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX908:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX908:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX908:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX908:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GFX908:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GFX908:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX908:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX908:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX908:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX908:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX908:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX908:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GFX908:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GFX908:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX908:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX908:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GFX908:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GFX908:   $vgpr0 = COPY [[C]](s32)
-  ; GFX908:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX908:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GFX908:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GFX908:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GFX908:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX908:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GFX908:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GFX908:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GFX908:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GFX908:   $vgpr31 = COPY [[OR1]](s32)
-  ; GFX908:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX908:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GFX908:   S_ENDPGM 0
+  ; GFX908-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX908-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX908-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX908-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GFX908-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GFX908-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GFX908-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GFX908-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX908-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GFX908-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   S_ENDPGM 0
   call void @external_void_func_i32(i32 42)
   ret void
 }
@@ -730,100 +740,102 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
 define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1 {
   ; GFX900-LABEL: name: test_only_workitem_id_y
   ; GFX900: bb.1 (%ir-block.0):
-  ; GFX900:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GFX900:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX900:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX900:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX900:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GFX900:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GFX900:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX900:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX900:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GFX900:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; GFX900:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX900:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX900:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX900:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX900:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GFX900:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GFX900:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX900:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX900:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX900:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX900:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX900:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX900:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GFX900:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GFX900:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX900:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX900:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GFX900:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GFX900:   $vgpr0 = COPY [[C]](s32)
-  ; GFX900:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX900:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GFX900:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GFX900:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GFX900:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX900:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GFX900:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GFX900:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GFX900:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GFX900:   $vgpr31 = COPY [[OR1]](s32)
-  ; GFX900:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX900:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GFX900:   S_ENDPGM 0
+  ; GFX900-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GFX900-NEXT: {{  $}}
+  ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX900-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX900-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX900-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GFX900-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GFX900-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GFX900-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GFX900-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX900-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GFX900-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   S_ENDPGM 0
   ; GFX908-LABEL: name: test_only_workitem_id_y
   ; GFX908: bb.1 (%ir-block.0):
-  ; GFX908:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GFX908:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX908:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX908:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX908:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GFX908:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GFX908:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX908:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX908:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GFX908:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; GFX908:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX908:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX908:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX908:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX908:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GFX908:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GFX908:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX908:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX908:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX908:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX908:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX908:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX908:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GFX908:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GFX908:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX908:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX908:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GFX908:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GFX908:   $vgpr0 = COPY [[C]](s32)
-  ; GFX908:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX908:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GFX908:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GFX908:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GFX908:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX908:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GFX908:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GFX908:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GFX908:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GFX908:   $vgpr31 = COPY [[OR1]](s32)
-  ; GFX908:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX908:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GFX908:   S_ENDPGM 0
+  ; GFX908-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX908-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX908-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX908-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GFX908-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GFX908-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GFX908-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GFX908-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX908-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GFX908-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   S_ENDPGM 0
   call void @external_void_func_i32(i32 42)
   ret void
 }
@@ -831,100 +843,102 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
 define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2 {
   ; GFX900-LABEL: name: test_only_workitem_id_z
   ; GFX900: bb.1 (%ir-block.0):
-  ; GFX900:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GFX900:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX900:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX900:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX900:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GFX900:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GFX900:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX900:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX900:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GFX900:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; GFX900:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX900:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX900:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX900:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX900:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GFX900:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GFX900:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX900:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX900:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX900:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX900:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX900:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX900:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GFX900:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GFX900:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX900:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX900:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GFX900:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GFX900:   $vgpr0 = COPY [[C]](s32)
-  ; GFX900:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX900:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GFX900:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GFX900:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GFX900:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX900:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GFX900:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GFX900:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GFX900:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GFX900:   $vgpr31 = COPY [[OR1]](s32)
-  ; GFX900:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX900:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GFX900:   S_ENDPGM 0
+  ; GFX900-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GFX900-NEXT: {{  $}}
+  ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX900-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX900-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX900-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GFX900-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GFX900-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GFX900-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GFX900-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX900-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GFX900-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   S_ENDPGM 0
   ; GFX908-LABEL: name: test_only_workitem_id_z
   ; GFX908: bb.1 (%ir-block.0):
-  ; GFX908:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GFX908:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX908:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX908:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX908:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GFX908:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GFX908:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX908:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX908:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GFX908:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; GFX908:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX908:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX908:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX908:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX908:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GFX908:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GFX908:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX908:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX908:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX908:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX908:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX908:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX908:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GFX908:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GFX908:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX908:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX908:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GFX908:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GFX908:   $vgpr0 = COPY [[C]](s32)
-  ; GFX908:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX908:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GFX908:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GFX908:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GFX908:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX908:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GFX908:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GFX908:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GFX908:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GFX908:   $vgpr31 = COPY [[OR1]](s32)
-  ; GFX908:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX908:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GFX908:   S_ENDPGM 0
+  ; GFX908-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX908-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX908-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX908-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GFX908-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GFX908-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GFX908-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GFX908-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX908-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GFX908-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   S_ENDPGM 0
   call void @external_void_func_i32(i32 42)
   ret void
 }
@@ -932,100 +946,102 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
 define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !3 {
   ; GFX900-LABEL: name: test_only_workitem_id_xy
   ; GFX900: bb.1 (%ir-block.0):
-  ; GFX900:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GFX900:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX900:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX900:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX900:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GFX900:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GFX900:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX900:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX900:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GFX900:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; GFX900:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX900:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX900:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX900:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX900:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GFX900:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GFX900:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX900:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX900:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX900:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX900:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX900:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX900:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GFX900:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GFX900:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX900:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX900:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GFX900:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GFX900:   $vgpr0 = COPY [[C]](s32)
-  ; GFX900:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX900:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GFX900:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GFX900:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GFX900:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX900:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GFX900:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GFX900:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GFX900:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GFX900:   $vgpr31 = COPY [[OR1]](s32)
-  ; GFX900:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX900:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GFX900:   S_ENDPGM 0
+  ; GFX900-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GFX900-NEXT: {{  $}}
+  ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX900-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX900-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX900-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GFX900-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GFX900-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GFX900-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GFX900-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX900-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GFX900-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   S_ENDPGM 0
   ; GFX908-LABEL: name: test_only_workitem_id_xy
   ; GFX908: bb.1 (%ir-block.0):
-  ; GFX908:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GFX908:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX908:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX908:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX908:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GFX908:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GFX908:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX908:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX908:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GFX908:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; GFX908:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX908:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX908:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX908:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX908:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GFX908:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GFX908:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX908:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX908:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX908:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX908:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX908:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX908:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GFX908:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GFX908:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX908:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX908:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GFX908:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GFX908:   $vgpr0 = COPY [[C]](s32)
-  ; GFX908:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX908:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GFX908:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GFX908:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GFX908:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX908:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GFX908:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GFX908:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GFX908:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GFX908:   $vgpr31 = COPY [[OR1]](s32)
-  ; GFX908:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX908:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GFX908:   S_ENDPGM 0
+  ; GFX908-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX908-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX908-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX908-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GFX908-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GFX908-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GFX908-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GFX908-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX908-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GFX908-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   S_ENDPGM 0
   call void @external_void_func_i32(i32 42)
   ret void
 }
@@ -1033,100 +1049,102 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
 define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !4 {
   ; GFX900-LABEL: name: test_only_workitem_id_yz
   ; GFX900: bb.1 (%ir-block.0):
-  ; GFX900:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GFX900:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX900:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX900:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX900:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GFX900:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GFX900:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX900:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX900:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GFX900:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; GFX900:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX900:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX900:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX900:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX900:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GFX900:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GFX900:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX900:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX900:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX900:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX900:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX900:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX900:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GFX900:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GFX900:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX900:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX900:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GFX900:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GFX900:   $vgpr0 = COPY [[C]](s32)
-  ; GFX900:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX900:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GFX900:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GFX900:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GFX900:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX900:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GFX900:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GFX900:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GFX900:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GFX900:   $vgpr31 = COPY [[OR1]](s32)
-  ; GFX900:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX900:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GFX900:   S_ENDPGM 0
+  ; GFX900-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GFX900-NEXT: {{  $}}
+  ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX900-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX900-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX900-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GFX900-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GFX900-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GFX900-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GFX900-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX900-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GFX900-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   S_ENDPGM 0
   ; GFX908-LABEL: name: test_only_workitem_id_yz
   ; GFX908: bb.1 (%ir-block.0):
-  ; GFX908:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GFX908:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX908:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX908:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX908:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GFX908:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GFX908:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX908:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX908:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GFX908:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; GFX908:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX908:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX908:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX908:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX908:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GFX908:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GFX908:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX908:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX908:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX908:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX908:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX908:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX908:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GFX908:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GFX908:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX908:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX908:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GFX908:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GFX908:   $vgpr0 = COPY [[C]](s32)
-  ; GFX908:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX908:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GFX908:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GFX908:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GFX908:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX908:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GFX908:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GFX908:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GFX908:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GFX908:   $vgpr31 = COPY [[OR1]](s32)
-  ; GFX908:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX908:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GFX908:   S_ENDPGM 0
+  ; GFX908-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX908-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX908-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX908-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GFX908-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GFX908-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GFX908-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GFX908-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX908-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GFX908-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   S_ENDPGM 0
   call void @external_void_func_i32(i32 42)
   ret void
 }
@@ -1134,100 +1152,102 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
 define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !5 {
   ; GFX900-LABEL: name: test_only_workitem_id_xz
   ; GFX900: bb.1 (%ir-block.0):
-  ; GFX900:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GFX900:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX900:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX900:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX900:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GFX900:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GFX900:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX900:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX900:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX900:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX900:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GFX900:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; GFX900:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX900:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX900:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX900:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX900:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GFX900:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX900:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GFX900:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX900:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX900:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX900:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX900:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX900:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX900:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX900:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GFX900:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GFX900:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX900:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX900:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GFX900:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GFX900:   $vgpr0 = COPY [[C]](s32)
-  ; GFX900:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX900:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GFX900:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GFX900:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GFX900:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX900:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GFX900:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GFX900:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GFX900:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GFX900:   $vgpr31 = COPY [[OR1]](s32)
-  ; GFX900:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX900:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GFX900:   S_ENDPGM 0
+  ; GFX900-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GFX900-NEXT: {{  $}}
+  ; GFX900-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GFX900-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GFX900-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GFX900-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX900-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX900-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX900-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX900-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX900-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX900-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GFX900-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; GFX900-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+  ; GFX900-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX900-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX900-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GFX900-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GFX900-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX900-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX900-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX900-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX900-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX900-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX900-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX900-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GFX900-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX900-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX900-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX900-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GFX900-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GFX900-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GFX900-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GFX900-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX900-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX900-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX900-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GFX900-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GFX900-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX900-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX900-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX900-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX900-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GFX900-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX900-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GFX900-NEXT:   S_ENDPGM 0
   ; GFX908-LABEL: name: test_only_workitem_id_xz
   ; GFX908: bb.1 (%ir-block.0):
-  ; GFX908:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GFX908:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GFX908:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GFX908:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GFX908:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GFX908:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GFX908:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GFX908:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GFX908:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GFX908:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GFX908:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GFX908:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; GFX908:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GFX908:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; GFX908:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GFX908:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GFX908:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GFX908:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GFX908:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GFX908:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GFX908:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GFX908:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GFX908:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GFX908:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GFX908:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GFX908:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GFX908:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GFX908:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GFX908:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GFX908:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GFX908:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GFX908:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GFX908:   $vgpr0 = COPY [[C]](s32)
-  ; GFX908:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GFX908:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GFX908:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GFX908:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GFX908:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GFX908:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GFX908:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GFX908:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GFX908:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GFX908:   $vgpr31 = COPY [[OR1]](s32)
-  ; GFX908:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GFX908:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GFX908:   S_ENDPGM 0
+  ; GFX908-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GFX908-NEXT: {{  $}}
+  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GFX908-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GFX908-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GFX908-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; GFX908-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+  ; GFX908-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GFX908-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GFX908-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GFX908-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GFX908-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GFX908-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GFX908-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GFX908-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GFX908-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GFX908-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GFX908-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GFX908-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GFX908-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GFX908-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GFX908-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GFX908-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GFX908-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GFX908-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GFX908-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GFX908-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GFX908-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GFX908-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GFX908-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GFX908-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GFX908-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GFX908-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GFX908-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GFX908-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GFX908-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GFX908-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GFX908-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GFX908-NEXT:   S_ENDPGM 0
   call void @external_void_func_i32(i32 42)
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 2e8bfdf4e5253..16c56eaf9a380 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -70,56 +70,57 @@ declare hidden i32 @external_gfx_i32_func_i32(i32) #0
 define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 {
   ; GCN-LABEL: name: test_call_external_i32_func_i32_imm
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; GCN:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; GCN:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset.cast, align 16, addrspace 4)
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i32_func_i32
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   $vgpr0 = COPY [[C]](s32)
-  ; GCN:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i32_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[COPY21]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; GCN-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset.cast, align 16, addrspace 4)
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i32_func_i32
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i32_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[COPY21]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call i32 @external_i32_func_i32(i32 42)
   store volatile i32 %val, i32 addrspace(1)* %out
   ret void
@@ -128,23 +129,24 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)*
 define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 {
   ; GCN-LABEL: name: test_gfx_call_external_i32_func_i32_imm
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_i32
-  ; GCN:   $vgpr0 = COPY [[C]](s32)
-  ; GCN:   [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
-  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[COPY4]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
-  ; GCN:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; GCN:   S_SETPC_B64_return [[COPY5]]
+  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_i32
+  ; GCN-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[COPY4]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; GCN-NEXT:   S_SETPC_B64_return [[COPY5]]
   %val = call amdgpu_gfx i32 @external_gfx_i32_func_i32(i32 42)
   store volatile i32 %val, i32 addrspace(1)* %out
   ret void
@@ -153,53 +155,54 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1)
 define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i1_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i1_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY19]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i1_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY19]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call i1 @external_i1_func_void()
   store volatile i1 %val, i1 addrspace(1)* undef
   ret void
@@ -208,20 +211,21 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
 define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 {
   ; GCN-LABEL: name: test_gfx_call_external_i1_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void
-  ; GCN:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
-  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
-  ; GCN:   S_SETPC_B64_return [[COPY3]]
+  ; GCN-NEXT:   liveins: $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; GCN-NEXT:   S_SETPC_B64_return [[COPY3]]
   %val = call amdgpu_gfx i1 @external_gfx_i1_func_void()
   store volatile i1 %val, i1 addrspace(1)* undef
   ret void
@@ -230,54 +234,55 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i1_zeroext_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i1_zeroext_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY19]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)
-  ; GCN:   G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i1_zeroext_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY19]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)
+  ; GCN-NEXT:   G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call i1 @external_i1_zeroext_func_void()
   %val.ext = zext i1 %val to i32
   store volatile i32 %val.ext, i32 addrspace(1)* undef
@@ -287,54 +292,55 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i1_signext_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i1_signext_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY19]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1)
-  ; GCN:   G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i1_signext_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY19]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1)
+  ; GCN-NEXT:   G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call i1 @external_i1_signext_func_void()
   %val.ext = sext i1 %val to i32
   store volatile i32 %val.ext, i32 addrspace(1)* undef
@@ -344,54 +350,55 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i8_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i8_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
-  ; GCN:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i8_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+  ; GCN-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call i8 @external_i8_func_void()
   store volatile i8 %val, i8 addrspace(1)* undef
   ret void
@@ -400,21 +407,22 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
 define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 {
   ; GCN-LABEL: name: test_gfx_call_external_i8_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i8_func_void
-  ; GCN:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
-  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
-  ; GCN:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
-  ; GCN:   S_SETPC_B64_return [[COPY3]]
+  ; GCN-NEXT:   liveins: $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i8_func_void
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+  ; GCN-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; GCN-NEXT:   S_SETPC_B64_return [[COPY3]]
   %val = call amdgpu_gfx i8 @external_gfx_i8_func_void()
   store volatile i8 %val, i8 addrspace(1)* undef
   ret void
@@ -423,55 +431,56 @@ define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i8_zeroext_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i8_zeroext_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
-  ; GCN:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC1]](s8)
-  ; GCN:   G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i8_zeroext_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+  ; GCN-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC1]](s8)
+  ; GCN-NEXT:   G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call i8 @external_i8_zeroext_func_void()
   %val.ext = zext i8 %val to i32
   store volatile i32 %val.ext, i32 addrspace(1)* undef
@@ -481,55 +490,56 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i8_signext_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i8_signext_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
-  ; GCN:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s8)
-  ; GCN:   G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i8_signext_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+  ; GCN-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s8)
+  ; GCN-NEXT:   G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call i8 @external_i8_signext_func_void()
   %val.ext = sext i8 %val to i32
   store volatile i32 %val.ext, i32 addrspace(1)* undef
@@ -539,53 +549,54 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i16_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i16_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i16_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call i16 @external_i16_func_void()
   store volatile i16 %val, i16 addrspace(1)* undef
   ret void
@@ -594,54 +605,55 @@ define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i16_zeroext_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i16_zeroext_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
-  ; GCN:   G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i16_zeroext_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
+  ; GCN-NEXT:   G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call i16 @external_i16_zeroext_func_void()
   %val.ext = zext i16 %val to i32
   store volatile i32 %val.ext, i32 addrspace(1)* undef
@@ -651,54 +663,55 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i16_signext_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i16_signext_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
-  ; GCN:   G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i16_signext_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
+  ; GCN-NEXT:   G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call i16 @external_i16_signext_func_void()
   %val.ext = sext i16 %val to i32
   store volatile i32 %val.ext, i32 addrspace(1)* undef
@@ -708,52 +721,53 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i32_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i32_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call i32 @external_i32_func_void()
   store volatile i32 %val, i32 addrspace(1)* undef
   ret void
@@ -762,19 +776,20 @@ define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
 define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 {
   ; GCN-LABEL: name: test_gfx_call_external_i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_void
-  ; GCN:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
-  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
-  ; GCN:   S_SETPC_B64_return [[COPY3]]
+  ; GCN-NEXT:   liveins: $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_void
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; GCN-NEXT:   S_SETPC_B64_return [[COPY3]]
   %val = call amdgpu_gfx i32 @external_gfx_i32_func_void()
   store volatile i32 %val, i32 addrspace(1)* undef
   ret void
@@ -783,55 +798,56 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i48_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i48_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i48_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (volatile store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i48_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (volatile store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call i48 @external_i48_func_void()
   store volatile i48 %val, i48 addrspace(1)* undef
   ret void
@@ -840,56 +856,57 @@ define amdgpu_kernel void @test_call_external_i48_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i48_zeroext_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i48_zeroext_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-  ; GCN:   G_STORE [[ZEXT]](s64), [[DEF]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i48_zeroext_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
+  ; GCN-NEXT:   G_STORE [[ZEXT]](s64), [[DEF]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call i48 @external_i48_zeroext_func_void()
   %ext = zext i48 %val to i64
   store volatile i64 %ext, i64 addrspace(1)* undef
@@ -899,56 +916,57 @@ define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i48_signext_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i48_signext_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48)
-  ; GCN:   G_STORE [[SEXT]](s64), [[DEF]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i48_signext_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48)
+  ; GCN-NEXT:   G_STORE [[SEXT]](s64), [[DEF]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call i48 @external_i48_signext_func_void()
   %ext = sext i48 %val to i64
   store volatile i64 %ext, i64 addrspace(1)* undef
@@ -958,54 +976,55 @@ define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i64_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i64_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i64_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call i64 @external_i64_func_void()
   store volatile i64 %val, i64 addrspace(1)* undef
   ret void
@@ -1014,54 +1033,55 @@ define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
 define amdgpu_kernel void @test_call_external_p1_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_p1_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_p1_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[MV]](p1), [[DEF]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_p1_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[MV]](p1), [[DEF]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call i8 addrspace(1)* @external_p1_func_void()
   store volatile i8 addrspace(1)* %val, i8 addrspace(1)* addrspace(1)* undef
   ret void
@@ -1070,58 +1090,59 @@ define amdgpu_kernel void @test_call_external_p1_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v2p1_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2p1_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2p1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
-  ; GCN:   [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
-  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (volatile store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2p1_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2p1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+  ; GCN-NEXT:   [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (volatile store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <2 x i8 addrspace(1)*> @external_v2p1_func_void()
   store volatile <2 x i8 addrspace(1)*> %val, <2 x i8 addrspace(1)*> addrspace(1)* undef
   ret void
@@ -1130,52 +1151,53 @@ define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 {
 define amdgpu_kernel void @test_call_external_p3_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_p3_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_p3_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY19:%[0-9]+]]:_(p3) = COPY $vgpr0
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[COPY19]](p3), [[DEF]](p3) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(3)* undef`, addrspace 3)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_p3_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[COPY19]](p3), [[DEF]](p3) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(3)* undef`, addrspace 3)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call i8 addrspace(3)* @external_p3_func_void()
   store volatile i8 addrspace(3)* %val, i8 addrspace(3)* addrspace(3)* undef
   ret void
@@ -1184,54 +1206,55 @@ define amdgpu_kernel void @test_call_external_p3_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v2p3_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2p3_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN:   [[COPY19:%[0-9]+]]:_(p3) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(p3) = COPY $vgpr1
-  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY19]](p3), [[COPY20]](p3)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p3) :: (volatile store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(3)* undef`, addrspace 3)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2p3_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(p3) = COPY $vgpr1
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY19]](p3), [[COPY20]](p3)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p3) :: (volatile store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(3)* undef`, addrspace 3)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <2 x i8 addrspace(3)*> @external_v2p3_func_void()
   store volatile <2 x i8 addrspace(3)*> %val, <2 x i8 addrspace(3)*> addrspace(3)* undef
   ret void
@@ -1240,53 +1263,54 @@ define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 {
 define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_f16_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_f16_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_f16_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call half @external_f16_func_void()
   store volatile half %val, half addrspace(1)* undef
   ret void
@@ -1295,52 +1319,53 @@ define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
 define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_f32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_f32_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `float addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_f32_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `float addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call float @external_f32_func_void()
   store volatile float %val, float addrspace(1)* undef
   ret void
@@ -1349,54 +1374,55 @@ define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_f64_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_f64_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `double addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_f64_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `double addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call double @external_f64_func_void()
   store volatile double %val, double addrspace(1)* undef
   ret void
@@ -1405,58 +1431,59 @@ define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v2f64_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2f64_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2f64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
-  ; GCN:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
-  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2f64_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2f64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
+  ; GCN-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <2 x double> @external_v2f64_func_void()
   store volatile <2 x double> %val, <2 x double> addrspace(1)* undef
   ret void
@@ -1465,54 +1492,55 @@ define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v2i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2i32_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2i32_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <2 x i32> @external_v2i32_func_void()
   store volatile <2 x i32> %val, <2 x i32> addrspace(1)* undef
   ret void
@@ -1521,55 +1549,56 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v3i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v3i32_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v3i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 8, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v3i32_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v3i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <3 x i32> @external_v3i32_func_void()
   store volatile <3 x i32> %val, <3 x i32> addrspace(1)* undef, align 8
   ret void
@@ -1578,56 +1607,57 @@ define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v4i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v4i32_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v4i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, align 8, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v4i32_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v4i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <4 x i32> @external_v4i32_func_void()
   store volatile <4 x i32> %val, <4 x i32> addrspace(1)* undef, align 8
   ret void
@@ -1636,57 +1666,58 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v5i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v5i32_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v5i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 8, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v5i32_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v5i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <5 x i32> @external_v5i32_func_void()
   store volatile <5 x i32> %val, <5 x i32> addrspace(1)* undef, align 8
   ret void
@@ -1695,60 +1726,61 @@ define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v8i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v8i32_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v8i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, align 8, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v8i32_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v8i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <8 x i32> @external_v8i32_func_void()
   store volatile <8 x i32> %val, <8 x i32> addrspace(1)* undef, align 8
   ret void
@@ -1757,68 +1789,69 @@ define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v16i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v16i32_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v16i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GCN:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GCN:   [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GCN:   [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GCN:   [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, align 8, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v16i32_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v16i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GCN-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GCN-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GCN-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GCN-NEXT:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GCN-NEXT:   [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GCN-NEXT:   [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GCN-NEXT:   [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <16 x i32> @external_v16i32_func_void()
   store volatile <16 x i32> %val, <16 x i32> addrspace(1)* undef, align 8
   ret void
@@ -1827,84 +1860,85 @@ define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v32i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v32i32_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v32i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GCN:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GCN:   [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GCN:   [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GCN:   [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GCN:   [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; GCN:   [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; GCN:   [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; GCN:   [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; GCN:   [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; GCN:   [[COPY40:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; GCN:   [[COPY41:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; GCN:   [[COPY42:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; GCN:   [[COPY43:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; GCN:   [[COPY44:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; GCN:   [[COPY45:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; GCN:   [[COPY46:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; GCN:   [[COPY47:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; GCN:   [[COPY48:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; GCN:   [[COPY49:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; GCN:   [[COPY50:%[0-9]+]]:_(s32) = COPY $vgpr31
-  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32), [[COPY37]](s32), [[COPY38]](s32), [[COPY39]](s32), [[COPY40]](s32), [[COPY41]](s32), [[COPY42]](s32), [[COPY43]](s32), [[COPY44]](s32), [[COPY45]](s32), [[COPY46]](s32), [[COPY47]](s32), [[COPY48]](s32), [[COPY49]](s32), [[COPY50]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v32i32_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v32i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GCN-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GCN-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GCN-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GCN-NEXT:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GCN-NEXT:   [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GCN-NEXT:   [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GCN-NEXT:   [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GCN-NEXT:   [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; GCN-NEXT:   [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; GCN-NEXT:   [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; GCN-NEXT:   [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; GCN-NEXT:   [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; GCN-NEXT:   [[COPY40:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; GCN-NEXT:   [[COPY41:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; GCN-NEXT:   [[COPY42:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; GCN-NEXT:   [[COPY43:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; GCN-NEXT:   [[COPY44:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; GCN-NEXT:   [[COPY45:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; GCN-NEXT:   [[COPY46:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; GCN-NEXT:   [[COPY47:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; GCN-NEXT:   [[COPY48:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; GCN-NEXT:   [[COPY49:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; GCN-NEXT:   [[COPY50:%[0-9]+]]:_(s32) = COPY $vgpr31
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32), [[COPY37]](s32), [[COPY38]](s32), [[COPY39]](s32), [[COPY40]](s32), [[COPY41]](s32), [[COPY42]](s32), [[COPY43]](s32), [[COPY44]](s32), [[COPY45]](s32), [[COPY46]](s32), [[COPY47]](s32), [[COPY48]](s32), [[COPY49]](s32), [[COPY50]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <32 x i32> @external_v32i32_func_void()
   store volatile <32 x i32> %val, <32 x i32> addrspace(1)* undef, align 8
   ret void
@@ -1913,52 +1947,53 @@ define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v2i16_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2i16_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[COPY19]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2i16_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[COPY19]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <2 x i16> @external_v2i16_func_void()
   store volatile <2 x i16> %val, <2 x i16> addrspace(1)* undef
   ret void
@@ -1967,56 +2002,57 @@ define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v3i16_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v3i16_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; GCN:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-  ; GCN:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[DEF1]](<2 x s16>)
-  ; GCN:   [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v3i16_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[DEF1]](<2 x s16>)
+  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <3 x i16> @external_v3i16_func_void()
   store volatile <3 x i16> %val, <3 x i16> addrspace(1)* undef
   ret void
@@ -2025,54 +2061,55 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v4i16_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v4i16_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; GCN:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v4i16_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; GCN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <4 x i16> @external_v4i16_func_void()
   store volatile <4 x i16> %val, <4 x i16> addrspace(1)* undef
   ret void
@@ -2081,52 +2118,53 @@ define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v2f16_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2f16_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[COPY19]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2f16_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[COPY19]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <2 x half> @external_v2f16_func_void()
   store volatile <2 x half> %val, <2 x half> addrspace(1)* undef
   ret void
@@ -2135,56 +2173,57 @@ define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v3f16_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v3f16_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; GCN:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-  ; GCN:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[DEF1]](<2 x s16>)
-  ; GCN:   [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v3f16_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[DEF1]](<2 x s16>)
+  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <3 x half> @external_v3f16_func_void()
   store volatile <3 x half> %val, <3 x half> addrspace(1)* undef
   ret void
@@ -2193,54 +2232,55 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v4f16_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v4f16_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; GCN:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v4f16_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; GCN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <4 x half> @external_v4f16_func_void()
   store volatile <4 x half> %val, <4 x half> addrspace(1)* undef
   ret void
@@ -2249,55 +2289,56 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v3f32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v3f32_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v3f32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v3f32_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v3f32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <3 x float> @external_v3f32_func_void()
   store volatile <3 x float> %val, <3 x float> addrspace(1)* undef
   ret void
@@ -2306,57 +2347,58 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v5f32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v5f32_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v5f32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `<5 x float> addrspace(1)* undef`, align 32, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v5f32_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v5f32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `<5 x float> addrspace(1)* undef`, align 32, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <5 x float> @external_v5f32_func_void()
   store volatile <5 x float> %val, <5 x float> addrspace(1)* undef
   ret void
@@ -2366,57 +2408,58 @@ define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i32_i64_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i32_i64_func_void
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY14]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY15]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[COPY20]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   G_STORE [[MV]](s64), [[COPY9]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i32_i64_func_void
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[COPY20]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[MV]](s64), [[COPY9]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call { i32, i64 } @external_i32_i64_func_void()
   %val.0 = extractvalue { i32, i64 } %val, 0
   %val.1 = extractvalue { i32, i64 } %val, 1
@@ -2428,24 +2471,25 @@ define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
 define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 {
   ; GCN-LABEL: name: test_gfx_call_external_i32_i64_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i32_i64_func_void
-  ; GCN:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
-  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   G_STORE [[MV]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
-  ; GCN:   S_SETPC_B64_return [[COPY6]]
+  ; GCN-NEXT:   liveins: $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i32_i64_func_void
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[MV]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; GCN-NEXT:   S_SETPC_B64_return [[COPY6]]
   %val = call amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void()
   %val.0 = extractvalue { i32, i64 } %val, 0
   %val.1 = extractvalue { i32, i64 } %val, 1
@@ -2457,54 +2501,55 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 {
 define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_a2i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_a2i32_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_a2i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   G_STORE [[COPY20]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_a2i32_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_a2i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY20]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call [2 x i32] @external_a2i32_func_void()
   %val.0 = extractvalue [2 x i32] %val, 0
   %val.1 = extractvalue [2 x i32] %val, 1
@@ -2516,70 +2561,71 @@ define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_a5i8_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_a5i8_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_a5i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
-  ; GCN:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32)
-  ; GCN:   [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
-  ; GCN:   [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16)
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32)
-  ; GCN:   [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16)
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32)
-  ; GCN:   [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16)
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   G_STORE [[TRUNC3]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   G_STORE [[TRUNC5]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   G_STORE [[TRUNC7]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   G_STORE [[TRUNC9]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_a5i8_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_a5i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+  ; GCN-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32)
+  ; GCN-NEXT:   [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
+  ; GCN-NEXT:   [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16)
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32)
+  ; GCN-NEXT:   [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16)
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32)
+  ; GCN-NEXT:   [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16)
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[TRUNC3]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[TRUNC5]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[TRUNC7]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[TRUNC9]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call [5 x i8] @external_a5i8_func_void()
   %val.0 = extractvalue [5 x i8] %val, 0
   %val.1 = extractvalue [5 x i8] %val, 1
@@ -2597,59 +2643,60 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v32i32_i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v32i32_i32_func_void
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   $vgpr0 = COPY [[FRAME_INDEX]](p5)
-  ; GCN:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY14]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY15]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5)
-  ; GCN:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
-  ; GCN:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32)
-  ; GCN:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5)
-  ; GCN:   G_STORE [[LOAD]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1)
-  ; GCN:   G_STORE [[LOAD1]](s32), [[COPY9]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v32i32_i32_func_void
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   $vgpr0 = COPY [[FRAME_INDEX]](p5)
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5)
+  ; GCN-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
+  ; GCN-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32)
+  ; GCN-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5)
+  ; GCN-NEXT:   G_STORE [[LOAD]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[LOAD1]](s32), [[COPY9]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call { <32 x i32>, i32 } @external_v32i32_i32_func_void()
   %val0 = extractvalue { <32 x i32>, i32 } %val, 0
   %val1 = extractvalue { <32 x i32>, i32 } %val, 1
@@ -2661,59 +2708,60 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_i32_v32i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i32_v32i32_func_void
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   $vgpr0 = COPY [[FRAME_INDEX]](p5)
-  ; GCN:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY14]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY15]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5)
-  ; GCN:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
-  ; GCN:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32)
-  ; GCN:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5)
-  ; GCN:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   G_STORE [[LOAD1]](<32 x s32>), [[COPY9]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i32_v32i32_func_void
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[SHL]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   $vgpr0 = COPY [[FRAME_INDEX]](p5)
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5)
+  ; GCN-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
+  ; GCN-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32)
+  ; GCN-NEXT:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5)
+  ; GCN-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[LOAD1]](<32 x s32>), [[COPY9]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call { i32, <32 x i32> } @external_i32_v32i32_func_void()
   %val0 = extractvalue { i32, <32 x i32> } %val, 0
   %val1 = extractvalue { i32, <32 x i32> } %val, 1
@@ -2725,54 +2773,55 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
   ; GCN-LABEL: name: test_call_external_v33i32_func_void
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v33i32_func_void
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   $vgpr0 = COPY [[FRAME_INDEX]](p5)
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
-  ; GCN:   G_STORE [[LOAD]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v33i32_func_void
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   $vgpr0 = COPY [[FRAME_INDEX]](p5)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
+  ; GCN-NEXT:   G_STORE [[LOAD]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <33 x i32> @external_v33i32_func_void()
   store volatile <33 x i32> %val, <33 x i32> addrspace(1)* undef, align 8
   ret void
@@ -2781,64 +2830,65 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
 define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) #0 {
   ; GCN-LABEL: name: test_call_external_v33i32_func_v33i32_i32
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; GCN:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset.cast, align 16, addrspace 4)
-  ; GCN:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C]](s64)
-  ; GCN:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset.cast, align 8, addrspace 4)
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; GCN:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   $vgpr0 = COPY [[FRAME_INDEX]](p5)
-  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1)
-  ; GCN:   $vgpr1 = COPY [[UV]](s32)
-  ; GCN:   $vgpr2 = COPY [[UV1]](s32)
-  ; GCN:   $vgpr3 = COPY [[LOAD1]](s32)
-  ; GCN:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
-  ; GCN:   G_STORE [[LOAD2]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset.cast, align 16, addrspace 4)
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C]](s64)
+  ; GCN-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset.cast, align 8, addrspace 4)
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; GCN-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   $vgpr0 = COPY [[FRAME_INDEX]](p5)
+  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1)
+  ; GCN-NEXT:   $vgpr1 = COPY [[UV]](s32)
+  ; GCN-NEXT:   $vgpr2 = COPY [[UV1]](s32)
+  ; GCN-NEXT:   $vgpr3 = COPY [[LOAD1]](s32)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
+  ; GCN-NEXT:   G_STORE [[LOAD2]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %val = call <33 x i32> @external_v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx)
   store volatile <33 x i32> %val, <33 x i32> addrspace(1)* undef, align 8
   ret void

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
index 433b9c4945525..56e996f6c0a76 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
@@ -6,72 +6,73 @@ declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({
 define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
   ; GCN-LABEL: name: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32
   ; GCN: bb.1 (%ir-block.1):
-  ; GCN:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GCN:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
-  ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; GCN:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.in.val
-  ; GCN:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1.out.val
-  ; GCN:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32)
-  ; GCN:   G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.in.gep01, addrspace 5)
-  ; GCN:   G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.in.gep1, addrspace 5)
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32
-  ; GCN:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GCN:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; GCN:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C3]](s64)
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]]
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C5]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN:   $vgpr0 = COPY [[FRAME_INDEX1]](p5)
-  ; GCN:   [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg
-  ; GCN:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GCN:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32)
-  ; GCN:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; GCN:   G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5)
-  ; GCN:   [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY15]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY16]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY17]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; GCN:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
-  ; GCN:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32)
-  ; GCN:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load (s8) from %ir.out.gep02, addrspace 5)
-  ; GCN:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (dereferenceable load (s32) from %ir.out.gep1, addrspace 5)
-  ; GCN:   G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   G_STORE [[LOAD1]](s32), [[COPY10]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.in.val
+  ; GCN-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1.out.val
+  ; GCN-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32)
+  ; GCN-NEXT:   G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.in.gep01, addrspace 5)
+  ; GCN-NEXT:   G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.in.gep1, addrspace 5)
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; GCN-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C3]](s64)
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]]
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C5]](s32)
+  ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; GCN-NEXT:   $vgpr0 = COPY [[FRAME_INDEX1]](p5)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg
+  ; GCN-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; GCN-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32)
+  ; GCN-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; GCN-NEXT:   G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5)
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
+  ; GCN-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32)
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load (s8) from %ir.out.gep02, addrspace 5)
+  ; GCN-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (dereferenceable load (s32) from %ir.out.gep1, addrspace 5)
+  ; GCN-NEXT:   G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[LOAD1]](s32), [[COPY10]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   S_ENDPGM 0
   %in.val = alloca { i8, i32 }, align 4, addrspace(5)
   %out.val = alloca { i8, i32 }, align 4, addrspace(5)
   %in.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index 8b541f942f4b6..bd7275c361b0e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -97,50 +97,51 @@ declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8,
 define amdgpu_kernel void @test_call_external_void_func_void() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_void
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_void()
   ret void
 }
@@ -148,16 +149,17 @@ define amdgpu_kernel void @test_call_external_void_func_void() #0 {
 define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_void
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   call amdgpu_gfx void @external_gfx_void_func_void()
   ret void
 }
@@ -165,40 +167,41 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
 define void @test_func_call_external_void_func_void() #0 {
   ; CHECK-LABEL: name: test_func_call_external_void_func_void
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY13]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY15]](s32)
-  ; CHECK:   $vgpr31 = COPY [[COPY16]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
-  ; CHECK:   S_SETPC_B64_return [[COPY18]]
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY18]]
   call void @external_void_func_void()
   ret void
 }
@@ -206,52 +209,53 @@ define void @test_func_call_external_void_func_void() #0 {
 define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_empty_struct
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_empty_struct
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   $vgpr0 = COPY [[C]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_empty_struct, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_empty_struct
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_empty_struct, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_empty_struct({} zeroinitializer, i32 23)
   ret void
 }
@@ -259,52 +263,53 @@ define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 {
 define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_empty_array
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_empty_array
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   $vgpr0 = COPY [[C]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_empty_array, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_empty_array
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_empty_array, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_empty_array([0 x i8] zeroinitializer, i32 23)
   ret void
 }
@@ -312,53 +317,54 @@ define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 {
 define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i1_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i1
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s1)
-  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i1, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i1
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s1)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i1, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_i1(i1 true)
   ret void
 }
@@ -366,55 +372,56 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i1_signext
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile load (s1) from `i1 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i1_signext
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1)
-  ; CHECK:   $vgpr0 = COPY [[SEXT]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile load (s1) from `i1 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i1_signext
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[SEXT]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %var = load volatile i1, i1 addrspace(1)* undef
   call void @external_void_func_i1_signext(i1 signext %var)
   ret void
@@ -423,55 +430,56 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i1_zeroext
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile load (s1) from `i1 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i1_zeroext
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1)
-  ; CHECK:   $vgpr0 = COPY [[ZEXT]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile load (s1) from `i1 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i1_zeroext
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ZEXT]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %var = load volatile i1, i1 addrspace(1)* undef
   call void @external_void_func_i1_zeroext(i1 zeroext %var)
   ret void
@@ -480,55 +488,56 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i8_imm
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 123
-  ; CHECK:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i8
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[C]](s8)
-  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ANYEXT1]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 123
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i8
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[C]](s8)
+  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT1]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_i8(i8 123)
   ret void
 }
@@ -536,56 +545,57 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i8_signext
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i8_signext
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[LOAD]](s8)
-  ; CHECK:   [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[SEXT]](s16)
-  ; CHECK:   $vgpr0 = COPY [[SEXT1]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i8_signext
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[LOAD]](s8)
+  ; CHECK-NEXT:   [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[SEXT]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[SEXT1]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %var = load volatile i8, i8 addrspace(1)* undef
   call void @external_void_func_i8_signext(i8 signext %var)
   ret void
@@ -594,56 +604,57 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i8_zeroext
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i8_zeroext
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[LOAD]](s8)
-  ; CHECK:   [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ZEXT]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ZEXT1]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i8_zeroext
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[LOAD]](s8)
+  ; CHECK-NEXT:   [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ZEXT]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ZEXT1]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %var = load volatile i8, i8 addrspace(1)* undef
   call void @external_void_func_i8_zeroext(i8 zeroext %var)
   ret void
@@ -652,53 +663,54 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i16_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 123
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i16
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 123
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i16
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_i16(i16 123)
   ret void
 }
@@ -706,55 +718,56 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i16_signext
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile load (s16) from `i16 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i16_signext
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16)
-  ; CHECK:   $vgpr0 = COPY [[SEXT]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile load (s16) from `i16 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i16_signext
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[SEXT]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %var = load volatile i16, i16 addrspace(1)* undef
   call void @external_void_func_i16_signext(i16 signext %var)
   ret void
@@ -763,55 +776,56 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i16_zeroext
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile load (s16) from `i16 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i16_zeroext
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ZEXT]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile load (s16) from `i16 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i16_zeroext
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ZEXT]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %var = load volatile i16, i16 addrspace(1)* undef
   call void @external_void_func_i16_zeroext(i16 zeroext %var)
   ret void
@@ -820,53 +834,54 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i32_imm
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; CHECK:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   $vgpr0 = COPY [[C]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_i32(i32 42)
   ret void
 }
@@ -874,19 +889,20 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
 define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32
-  ; CHECK:   $vgpr0 = COPY [[C]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32
+  ; CHECK-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   call amdgpu_gfx void @external_gfx_void_func_i32(i32 42)
   ret void
 }
@@ -894,19 +910,20 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
 define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 {
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm_inreg
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK:   liveins: $sgpr4, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg
-  ; CHECK:   $sgpr4 = COPY [[C]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $sgpr4, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg
+  ; CHECK-NEXT:   $sgpr4 = COPY [[C]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42)
   ret void
 }
@@ -914,54 +931,55 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg
 define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i64_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 123
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i64
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 123
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i64
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_i64(i64 123)
   ret void
 }
@@ -969,57 +987,58 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i64
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i64
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %val = load <2 x i64>, <2 x i64> addrspace(1)* null
   call void @external_void_func_v2i64(<2 x i64> %val)
   ret void
@@ -1028,58 +1047,59 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i64_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i64
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i64
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
   ret void
 }
@@ -1087,57 +1107,58 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i48
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48)
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %var = load volatile i48, i48 addrspace(1)* undef
   call void @external_void_func_i48(i48 %var)
   ret void
@@ -1146,57 +1167,58 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i48_signext
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48_signext
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48)
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48_signext
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %var = load volatile i48, i48 addrspace(1)* undef
   call void @external_void_func_i48_signext(i48 signext %var)
   ret void
@@ -1205,57 +1227,58 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_i48_zeroext
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48_zeroext
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48)
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48_zeroext
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %var = load volatile i48, i48 addrspace(1)* undef
   call void @external_void_func_i48_zeroext(i48 zeroext %var)
   ret void
@@ -1264,55 +1287,56 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_p0_imm(i8* %arg) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_p0_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset.cast, align 16, addrspace 4)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_p0
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p0)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset.cast, align 16, addrspace 4)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_p0
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p0)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_p0(i8* %arg)
   ret void
 }
@@ -1320,57 +1344,58 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(i8* %arg) #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2p0
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<2 x p0>) = G_LOAD [[C]](p1) :: (load (<2 x p0>) from `<2 x i8*> addrspace(1)* null`, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2p0
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x p0>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p0>) = G_LOAD [[C]](p1) :: (load (<2 x p0>) from `<2 x i8*> addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2p0
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x p0>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %val = load <2 x i8*>, <2 x i8*> addrspace(1)* null
   call void @external_void_func_v2p0(<2 x i8*> %val)
   ret void
@@ -1379,63 +1404,64 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v3i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593
-  ; CHECK:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[DEF]](s64)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1)
-  ; CHECK:   [[SHUF:%[0-9]+]]:_(<3 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i64
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<3 x s64>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[DEF]](s64)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<3 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i64
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<3 x s64>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %load = load <2 x i64>, <2 x i64> addrspace(1)* null
   %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
 
@@ -1446,65 +1472,66 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593
-  ; CHECK:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C2]](s64)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1)
-  ; CHECK:   [[SHUF:%[0-9]+]]:_(<4 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2, 3)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i64
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<4 x s64>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](s32)
-  ; CHECK:   $vgpr6 = COPY [[UV6]](s32)
-  ; CHECK:   $vgpr7 = COPY [[UV7]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C2]](s64)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<4 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2, 3)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i64
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<4 x s64>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %load = load <2 x i64>, <2 x i64> addrspace(1)* null
   %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   call void @external_void_func_v4i64(<4 x i64> %val)
@@ -1514,53 +1541,54 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
 define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_f16_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4400
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_f16
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_f16, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4400
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_f16
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_f16, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_f16(half 4.0)
   ret void
 }
@@ -1568,52 +1596,53 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_f32_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_f32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   $vgpr0 = COPY [[C]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_f32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_f32(float 4.0)
   ret void
 }
@@ -1621,56 +1650,57 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2f32_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2f32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2f32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
   ret void
 }
@@ -1678,58 +1708,59 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v3f32_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3f32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3f32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>)
   ret void
 }
@@ -1737,62 +1768,63 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v5f32_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v5f32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C6]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v5f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v5f32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C6]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v5f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
   ret void
 }
@@ -1800,54 +1832,55 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_f64_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_f64
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_f64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_f64
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_f64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_f64(double 4.0)
   ret void
 }
@@ -1855,58 +1888,59 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2f64_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2f64
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2f64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2f64
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2f64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
   ret void
 }
@@ -1914,61 +1948,62 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v3f64_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00
-  ; CHECK:   [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 8.000000e+00
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64), [[C2]](s64)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3f64
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s64>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3f64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 8.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64), [[C2]](s64)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3f64
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s64>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3f64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
   ret void
 }
@@ -1976,53 +2011,54 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i16
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   $vgpr0 = COPY [[LOAD]](<2 x s16>)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i16
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   $vgpr0 = COPY [[LOAD]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %val = load <2 x i16>, <2 x i16> addrspace(1)* undef
   call void @external_void_func_v2i16(<2 x i16> %val)
   ret void
@@ -2031,57 +2067,58 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v3i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i16
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>)
-  ; CHECK:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](<2 x s16>)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](<2 x s16>)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i16
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %val = load <3 x i16>, <3 x i16> addrspace(1)* undef
   call void @external_void_func_v3i16(<3 x i16> %val)
   ret void
@@ -2090,57 +2127,58 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v3f16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3f16
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>)
-  ; CHECK:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](<2 x s16>)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](<2 x s16>)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3f16
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %val = load <3 x half>, <3 x half> addrspace(1)* undef
   call void @external_void_func_v3f16(<3 x half> %val)
   ret void
@@ -2149,55 +2187,56 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i16
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](<2 x s16>)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](<2 x s16>)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i16
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %val = load <4 x i16>, <4 x i16> addrspace(1)* undef
   call void @external_void_func_v4i16(<4 x i16> %val)
   ret void
@@ -2206,58 +2245,59 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i16_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
-  ; CHECK:   [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
-  ; CHECK:   [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3
-  ; CHECK:   [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i16
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](<2 x s16>)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](<2 x s16>)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i16
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
   ret void
 }
@@ -2265,58 +2305,59 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v5i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[DEF]](p1) :: (load (<5 x s16>) from `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v5i16
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[DEF1]](<5 x s16>)
-  ; CHECK:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](<2 x s16>)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](<2 x s16>)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](<2 x s16>)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[DEF]](p1) :: (load (<5 x s16>) from `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v5i16
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[DEF1]](<5 x s16>)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %val = load <5 x i16>, <5 x i16> addrspace(1)* undef
   call void @external_void_func_v5i16(<5 x i16> %val)
   ret void
@@ -2325,59 +2366,60 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v7i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[DEF]](p1) :: (load (<7 x s16>) from `<7 x i16> addrspace(1)* undef`, align 16, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v7i16
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[DEF1:%[0-9]+]]:_(<7 x s16>) = G_IMPLICIT_DEF
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[DEF1]](<7 x s16>)
-  ; CHECK:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<14 x s16>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](<2 x s16>)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](<2 x s16>)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](<2 x s16>)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](<2 x s16>)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[DEF]](p1) :: (load (<7 x s16>) from `<7 x i16> addrspace(1)* undef`, align 16, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v7i16
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(<7 x s16>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[DEF1]](<7 x s16>)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<14 x s16>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %val = load <7 x i16>, <7 x i16> addrspace(1)* undef
   call void @external_void_func_v7i16(<7 x i16> %val)
   ret void
@@ -2386,90 +2428,91 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v63i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<63 x s16>) = G_LOAD [[DEF]](p1) :: (load (<63 x s16>) from `<63 x i16> addrspace(1)* undef`, align 128, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v63i16
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[DEF1:%[0-9]+]]:_(<63 x s16>) = G_IMPLICIT_DEF
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<126 x s16>) = G_CONCAT_VECTORS [[LOAD]](<63 x s16>), [[DEF1]](<63 x s16>)
-  ; CHECK:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<126 x s16>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](<2 x s16>)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](<2 x s16>)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](<2 x s16>)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](<2 x s16>)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](<2 x s16>)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](<2 x s16>)
-  ; CHECK:   $vgpr6 = COPY [[UV6]](<2 x s16>)
-  ; CHECK:   $vgpr7 = COPY [[UV7]](<2 x s16>)
-  ; CHECK:   $vgpr8 = COPY [[UV8]](<2 x s16>)
-  ; CHECK:   $vgpr9 = COPY [[UV9]](<2 x s16>)
-  ; CHECK:   $vgpr10 = COPY [[UV10]](<2 x s16>)
-  ; CHECK:   $vgpr11 = COPY [[UV11]](<2 x s16>)
-  ; CHECK:   $vgpr12 = COPY [[UV12]](<2 x s16>)
-  ; CHECK:   $vgpr13 = COPY [[UV13]](<2 x s16>)
-  ; CHECK:   $vgpr14 = COPY [[UV14]](<2 x s16>)
-  ; CHECK:   $vgpr15 = COPY [[UV15]](<2 x s16>)
-  ; CHECK:   $vgpr16 = COPY [[UV16]](<2 x s16>)
-  ; CHECK:   $vgpr17 = COPY [[UV17]](<2 x s16>)
-  ; CHECK:   $vgpr18 = COPY [[UV18]](<2 x s16>)
-  ; CHECK:   $vgpr19 = COPY [[UV19]](<2 x s16>)
-  ; CHECK:   $vgpr20 = COPY [[UV20]](<2 x s16>)
-  ; CHECK:   $vgpr21 = COPY [[UV21]](<2 x s16>)
-  ; CHECK:   $vgpr22 = COPY [[UV22]](<2 x s16>)
-  ; CHECK:   $vgpr23 = COPY [[UV23]](<2 x s16>)
-  ; CHECK:   $vgpr24 = COPY [[UV24]](<2 x s16>)
-  ; CHECK:   $vgpr25 = COPY [[UV25]](<2 x s16>)
-  ; CHECK:   $vgpr26 = COPY [[UV26]](<2 x s16>)
-  ; CHECK:   $vgpr27 = COPY [[UV27]](<2 x s16>)
-  ; CHECK:   $vgpr28 = COPY [[UV28]](<2 x s16>)
-  ; CHECK:   $vgpr29 = COPY [[UV29]](<2 x s16>)
-  ; CHECK:   $vgpr30 = COPY [[UV30]](<2 x s16>)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
-  ; CHECK:   G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<63 x s16>) = G_LOAD [[DEF]](p1) :: (load (<63 x s16>) from `<63 x i16> addrspace(1)* undef`, align 128, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v63i16
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(<63 x s16>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<126 x s16>) = G_CONCAT_VECTORS [[LOAD]](<63 x s16>), [[DEF1]](<63 x s16>)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<126 x s16>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %val = load <63 x i16>, <63 x i16> addrspace(1)* undef
   call void @external_void_func_v63i16(<63 x i16> %val)
   ret void
@@ -2478,93 +2521,94 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v65i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<65 x s16>) = G_LOAD [[DEF]](p1) :: (load (<65 x s16>) from `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v65i16
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[DEF1:%[0-9]+]]:_(<65 x s16>) = G_IMPLICIT_DEF
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[LOAD]](<65 x s16>), [[DEF1]](<65 x s16>)
-  ; CHECK:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](<2 x s16>)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](<2 x s16>)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](<2 x s16>)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](<2 x s16>)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](<2 x s16>)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](<2 x s16>)
-  ; CHECK:   $vgpr6 = COPY [[UV6]](<2 x s16>)
-  ; CHECK:   $vgpr7 = COPY [[UV7]](<2 x s16>)
-  ; CHECK:   $vgpr8 = COPY [[UV8]](<2 x s16>)
-  ; CHECK:   $vgpr9 = COPY [[UV9]](<2 x s16>)
-  ; CHECK:   $vgpr10 = COPY [[UV10]](<2 x s16>)
-  ; CHECK:   $vgpr11 = COPY [[UV11]](<2 x s16>)
-  ; CHECK:   $vgpr12 = COPY [[UV12]](<2 x s16>)
-  ; CHECK:   $vgpr13 = COPY [[UV13]](<2 x s16>)
-  ; CHECK:   $vgpr14 = COPY [[UV14]](<2 x s16>)
-  ; CHECK:   $vgpr15 = COPY [[UV15]](<2 x s16>)
-  ; CHECK:   $vgpr16 = COPY [[UV16]](<2 x s16>)
-  ; CHECK:   $vgpr17 = COPY [[UV17]](<2 x s16>)
-  ; CHECK:   $vgpr18 = COPY [[UV18]](<2 x s16>)
-  ; CHECK:   $vgpr19 = COPY [[UV19]](<2 x s16>)
-  ; CHECK:   $vgpr20 = COPY [[UV20]](<2 x s16>)
-  ; CHECK:   $vgpr21 = COPY [[UV21]](<2 x s16>)
-  ; CHECK:   $vgpr22 = COPY [[UV22]](<2 x s16>)
-  ; CHECK:   $vgpr23 = COPY [[UV23]](<2 x s16>)
-  ; CHECK:   $vgpr24 = COPY [[UV24]](<2 x s16>)
-  ; CHECK:   $vgpr25 = COPY [[UV25]](<2 x s16>)
-  ; CHECK:   $vgpr26 = COPY [[UV26]](<2 x s16>)
-  ; CHECK:   $vgpr27 = COPY [[UV27]](<2 x s16>)
-  ; CHECK:   $vgpr28 = COPY [[UV28]](<2 x s16>)
-  ; CHECK:   $vgpr29 = COPY [[UV29]](<2 x s16>)
-  ; CHECK:   $vgpr30 = COPY [[UV30]](<2 x s16>)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
-  ; CHECK:   G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
-  ; CHECK:   G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5)
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<65 x s16>) = G_LOAD [[DEF]](p1) :: (load (<65 x s16>) from `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v65i16
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(<65 x s16>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[LOAD]](<65 x s16>), [[DEF1]](<65 x s16>)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5)
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %val = load <65 x i16>, <65 x i16> addrspace(1)* undef
   call void @external_void_func_v65i16(<65 x i16> %val)
   ret void
@@ -2573,91 +2617,92 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v66i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<66 x s16>) = G_LOAD [[DEF]](p1) :: (load (<66 x s16>) from `<66 x i16> addrspace(1)* undef`, align 256, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v66i16
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<66 x s16>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](<2 x s16>)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](<2 x s16>)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](<2 x s16>)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](<2 x s16>)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](<2 x s16>)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](<2 x s16>)
-  ; CHECK:   $vgpr6 = COPY [[UV6]](<2 x s16>)
-  ; CHECK:   $vgpr7 = COPY [[UV7]](<2 x s16>)
-  ; CHECK:   $vgpr8 = COPY [[UV8]](<2 x s16>)
-  ; CHECK:   $vgpr9 = COPY [[UV9]](<2 x s16>)
-  ; CHECK:   $vgpr10 = COPY [[UV10]](<2 x s16>)
-  ; CHECK:   $vgpr11 = COPY [[UV11]](<2 x s16>)
-  ; CHECK:   $vgpr12 = COPY [[UV12]](<2 x s16>)
-  ; CHECK:   $vgpr13 = COPY [[UV13]](<2 x s16>)
-  ; CHECK:   $vgpr14 = COPY [[UV14]](<2 x s16>)
-  ; CHECK:   $vgpr15 = COPY [[UV15]](<2 x s16>)
-  ; CHECK:   $vgpr16 = COPY [[UV16]](<2 x s16>)
-  ; CHECK:   $vgpr17 = COPY [[UV17]](<2 x s16>)
-  ; CHECK:   $vgpr18 = COPY [[UV18]](<2 x s16>)
-  ; CHECK:   $vgpr19 = COPY [[UV19]](<2 x s16>)
-  ; CHECK:   $vgpr20 = COPY [[UV20]](<2 x s16>)
-  ; CHECK:   $vgpr21 = COPY [[UV21]](<2 x s16>)
-  ; CHECK:   $vgpr22 = COPY [[UV22]](<2 x s16>)
-  ; CHECK:   $vgpr23 = COPY [[UV23]](<2 x s16>)
-  ; CHECK:   $vgpr24 = COPY [[UV24]](<2 x s16>)
-  ; CHECK:   $vgpr25 = COPY [[UV25]](<2 x s16>)
-  ; CHECK:   $vgpr26 = COPY [[UV26]](<2 x s16>)
-  ; CHECK:   $vgpr27 = COPY [[UV27]](<2 x s16>)
-  ; CHECK:   $vgpr28 = COPY [[UV28]](<2 x s16>)
-  ; CHECK:   $vgpr29 = COPY [[UV29]](<2 x s16>)
-  ; CHECK:   $vgpr30 = COPY [[UV30]](<2 x s16>)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
-  ; CHECK:   G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
-  ; CHECK:   G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5)
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<66 x s16>) = G_LOAD [[DEF]](p1) :: (load (<66 x s16>) from `<66 x i16> addrspace(1)* undef`, align 256, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v66i16
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<66 x s16>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](<2 x s16>)
+  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5)
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %val = load <66 x i16>, <66 x i16> addrspace(1)* undef
   call void @external_void_func_v66i16(<66 x i16> %val)
   ret void
@@ -2666,53 +2711,54 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2f16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2f16
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   $vgpr0 = COPY [[LOAD]](<2 x s16>)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2f16
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   $vgpr0 = COPY [[LOAD]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %val = load <2 x half>, <2 x half> addrspace(1)* undef
   call void @external_void_func_v2f16(<2 x half> %val)
   ret void
@@ -2721,55 +2767,56 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %val = load <2 x i32>, <2 x i32> addrspace(1)* undef
   call void @external_void_func_v2i32(<2 x i32> %val)
   ret void
@@ -2778,56 +2825,57 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i32_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C2]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
   ret void
 }
@@ -2835,59 +2883,60 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v3i32_imm
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32)
-  ; CHECK:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32)
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>)
   ret void
 }
@@ -2895,61 +2944,62 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v3i32_i32
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-  ; CHECK:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i32_i32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[C3]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i32_i32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[C3]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
   ret void
 }
@@ -2957,57 +3007,58 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %val = load <4 x i32>, <4 x i32> addrspace(1)* undef
   call void @external_void_func_v4i32(<4 x i32> %val)
   ret void
@@ -3016,60 +3067,61 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i32_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C4]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C5]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
   ret void
 }
@@ -3077,62 +3129,63 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v5i32_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v5i32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C6]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v5i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v5i32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C5]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C6]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v5i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
   ret void
 }
@@ -3140,62 +3193,63 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v8i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v8i32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](s32)
-  ; CHECK:   $vgpr6 = COPY [[UV6]](s32)
-  ; CHECK:   $vgpr7 = COPY [[UV7]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v8i32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
   %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
   call void @external_void_func_v8i32(<8 x i32> %val)
@@ -3205,68 +3259,69 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v8i32_imm
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-  ; CHECK:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-  ; CHECK:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32), [[C5]](s32), [[C6]](s32), [[C7]](s32)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v8i32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C8]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C9]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C10]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](s32)
-  ; CHECK:   $vgpr6 = COPY [[UV6]](s32)
-  ; CHECK:   $vgpr7 = COPY [[UV7]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32), [[C5]](s32), [[C6]](s32), [[C7]](s32)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v8i32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C8]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C9]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C10]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
   ret void
 }
@@ -3274,70 +3329,71 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v16i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v16i32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](s32)
-  ; CHECK:   $vgpr6 = COPY [[UV6]](s32)
-  ; CHECK:   $vgpr7 = COPY [[UV7]](s32)
-  ; CHECK:   $vgpr8 = COPY [[UV8]](s32)
-  ; CHECK:   $vgpr9 = COPY [[UV9]](s32)
-  ; CHECK:   $vgpr10 = COPY [[UV10]](s32)
-  ; CHECK:   $vgpr11 = COPY [[UV11]](s32)
-  ; CHECK:   $vgpr12 = COPY [[UV12]](s32)
-  ; CHECK:   $vgpr13 = COPY [[UV13]](s32)
-  ; CHECK:   $vgpr14 = COPY [[UV14]](s32)
-  ; CHECK:   $vgpr15 = COPY [[UV15]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v16i32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
+  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
+  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
+  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
+  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
+  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
+  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
+  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
+  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
   %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
   call void @external_void_func_v16i32(<16 x i32> %val)
@@ -3347,89 +3403,90 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v32i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](s32)
-  ; CHECK:   $vgpr6 = COPY [[UV6]](s32)
-  ; CHECK:   $vgpr7 = COPY [[UV7]](s32)
-  ; CHECK:   $vgpr8 = COPY [[UV8]](s32)
-  ; CHECK:   $vgpr9 = COPY [[UV9]](s32)
-  ; CHECK:   $vgpr10 = COPY [[UV10]](s32)
-  ; CHECK:   $vgpr11 = COPY [[UV11]](s32)
-  ; CHECK:   $vgpr12 = COPY [[UV12]](s32)
-  ; CHECK:   $vgpr13 = COPY [[UV13]](s32)
-  ; CHECK:   $vgpr14 = COPY [[UV14]](s32)
-  ; CHECK:   $vgpr15 = COPY [[UV15]](s32)
-  ; CHECK:   $vgpr16 = COPY [[UV16]](s32)
-  ; CHECK:   $vgpr17 = COPY [[UV17]](s32)
-  ; CHECK:   $vgpr18 = COPY [[UV18]](s32)
-  ; CHECK:   $vgpr19 = COPY [[UV19]](s32)
-  ; CHECK:   $vgpr20 = COPY [[UV20]](s32)
-  ; CHECK:   $vgpr21 = COPY [[UV21]](s32)
-  ; CHECK:   $vgpr22 = COPY [[UV22]](s32)
-  ; CHECK:   $vgpr23 = COPY [[UV23]](s32)
-  ; CHECK:   $vgpr24 = COPY [[UV24]](s32)
-  ; CHECK:   $vgpr25 = COPY [[UV25]](s32)
-  ; CHECK:   $vgpr26 = COPY [[UV26]](s32)
-  ; CHECK:   $vgpr27 = COPY [[UV27]](s32)
-  ; CHECK:   $vgpr28 = COPY [[UV28]](s32)
-  ; CHECK:   $vgpr29 = COPY [[UV29]](s32)
-  ; CHECK:   $vgpr30 = COPY [[UV30]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
-  ; CHECK:   G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
+  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
+  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
+  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
+  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
+  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
+  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
+  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
+  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
+  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
+  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
+  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
+  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
+  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
+  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
+  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
+  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
+  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
+  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
+  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
+  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
+  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
+  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
+  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
   %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
   call void @external_void_func_v32i32(<32 x i32> %val)
@@ -3439,95 +3496,96 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v32i32_i32
   ; CHECK: bb.1 (%ir-block.1):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr0, addrspace 1)
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](s32)
-  ; CHECK:   $vgpr6 = COPY [[UV6]](s32)
-  ; CHECK:   $vgpr7 = COPY [[UV7]](s32)
-  ; CHECK:   $vgpr8 = COPY [[UV8]](s32)
-  ; CHECK:   $vgpr9 = COPY [[UV9]](s32)
-  ; CHECK:   $vgpr10 = COPY [[UV10]](s32)
-  ; CHECK:   $vgpr11 = COPY [[UV11]](s32)
-  ; CHECK:   $vgpr12 = COPY [[UV12]](s32)
-  ; CHECK:   $vgpr13 = COPY [[UV13]](s32)
-  ; CHECK:   $vgpr14 = COPY [[UV14]](s32)
-  ; CHECK:   $vgpr15 = COPY [[UV15]](s32)
-  ; CHECK:   $vgpr16 = COPY [[UV16]](s32)
-  ; CHECK:   $vgpr17 = COPY [[UV17]](s32)
-  ; CHECK:   $vgpr18 = COPY [[UV18]](s32)
-  ; CHECK:   $vgpr19 = COPY [[UV19]](s32)
-  ; CHECK:   $vgpr20 = COPY [[UV20]](s32)
-  ; CHECK:   $vgpr21 = COPY [[UV21]](s32)
-  ; CHECK:   $vgpr22 = COPY [[UV22]](s32)
-  ; CHECK:   $vgpr23 = COPY [[UV23]](s32)
-  ; CHECK:   $vgpr24 = COPY [[UV24]](s32)
-  ; CHECK:   $vgpr25 = COPY [[UV25]](s32)
-  ; CHECK:   $vgpr26 = COPY [[UV26]](s32)
-  ; CHECK:   $vgpr27 = COPY [[UV27]](s32)
-  ; CHECK:   $vgpr28 = COPY [[UV28]](s32)
-  ; CHECK:   $vgpr29 = COPY [[UV29]](s32)
-  ; CHECK:   $vgpr30 = COPY [[UV30]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
-  ; CHECK:   G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
-  ; CHECK:   G_STORE [[LOAD2]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5)
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr0, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
+  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
+  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
+  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
+  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
+  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
+  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
+  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
+  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
+  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
+  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
+  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
+  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
+  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
+  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
+  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
+  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
+  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
+  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
+  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
+  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
+  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
+  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
+  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
+  ; CHECK-NEXT:   G_STORE [[LOAD2]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5)
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
   %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0
   %val1 = load i32, i32 addrspace(1)* undef
@@ -3538,104 +3596,105 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
 define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v32i32_i8_i8_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF1]](p1)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr0, addrspace 1)
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY10]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64)
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]]
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](s32)
-  ; CHECK:   $vgpr6 = COPY [[UV6]](s32)
-  ; CHECK:   $vgpr7 = COPY [[UV7]](s32)
-  ; CHECK:   $vgpr8 = COPY [[UV8]](s32)
-  ; CHECK:   $vgpr9 = COPY [[UV9]](s32)
-  ; CHECK:   $vgpr10 = COPY [[UV10]](s32)
-  ; CHECK:   $vgpr11 = COPY [[UV11]](s32)
-  ; CHECK:   $vgpr12 = COPY [[UV12]](s32)
-  ; CHECK:   $vgpr13 = COPY [[UV13]](s32)
-  ; CHECK:   $vgpr14 = COPY [[UV14]](s32)
-  ; CHECK:   $vgpr15 = COPY [[UV15]](s32)
-  ; CHECK:   $vgpr16 = COPY [[UV16]](s32)
-  ; CHECK:   $vgpr17 = COPY [[UV17]](s32)
-  ; CHECK:   $vgpr18 = COPY [[UV18]](s32)
-  ; CHECK:   $vgpr19 = COPY [[UV19]](s32)
-  ; CHECK:   $vgpr20 = COPY [[UV20]](s32)
-  ; CHECK:   $vgpr21 = COPY [[UV21]](s32)
-  ; CHECK:   $vgpr22 = COPY [[UV22]](s32)
-  ; CHECK:   $vgpr23 = COPY [[UV23]](s32)
-  ; CHECK:   $vgpr24 = COPY [[UV24]](s32)
-  ; CHECK:   $vgpr25 = COPY [[UV25]](s32)
-  ; CHECK:   $vgpr26 = COPY [[UV26]](s32)
-  ; CHECK:   $vgpr27 = COPY [[UV27]](s32)
-  ; CHECK:   $vgpr28 = COPY [[UV28]](s32)
-  ; CHECK:   $vgpr29 = COPY [[UV29]](s32)
-  ; CHECK:   $vgpr30 = COPY [[UV30]](s32)
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C3]](s32)
-  ; CHECK:   G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD2]](s8)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C4]](s32)
-  ; CHECK:   G_STORE [[ANYEXT]](s16), [[PTR_ADD2]](p5) :: (store (s16) into stack + 4, align 4, addrspace 5)
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16)
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; CHECK:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C5]](s32)
-  ; CHECK:   G_STORE [[COPY22]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5)
-  ; CHECK:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; CHECK:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32)
-  ; CHECK:   G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY23]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY16]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY17]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 16, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF1]](p1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr0, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY10]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
+  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
+  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
+  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
+  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
+  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
+  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
+  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
+  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
+  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
+  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
+  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
+  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
+  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
+  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
+  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
+  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
+  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
+  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
+  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
+  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
+  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
+  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
+  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C3]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD2]](s8)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C4]](s32)
+  ; CHECK-NEXT:   G_STORE [[ANYEXT]](s16), [[PTR_ADD2]](p5) :: (store (s16) into stack + 4, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C5]](s32)
+  ; CHECK-NEXT:   G_STORE [[COPY22]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32)
+  ; CHECK-NEXT:   G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY23]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 16, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
   %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0
   %val1 = load i8, i8 addrspace(1)* undef
@@ -3648,99 +3707,100 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v32i32_p3_p5
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF1]](p1)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr0, addrspace 1)
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(p3) = G_LOAD [[DEF1]](p1) :: (load (p3) from `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[COPY10]](p1) :: (load (p5) from `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_p3_p5
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64)
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]]
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](s32)
-  ; CHECK:   $vgpr6 = COPY [[UV6]](s32)
-  ; CHECK:   $vgpr7 = COPY [[UV7]](s32)
-  ; CHECK:   $vgpr8 = COPY [[UV8]](s32)
-  ; CHECK:   $vgpr9 = COPY [[UV9]](s32)
-  ; CHECK:   $vgpr10 = COPY [[UV10]](s32)
-  ; CHECK:   $vgpr11 = COPY [[UV11]](s32)
-  ; CHECK:   $vgpr12 = COPY [[UV12]](s32)
-  ; CHECK:   $vgpr13 = COPY [[UV13]](s32)
-  ; CHECK:   $vgpr14 = COPY [[UV14]](s32)
-  ; CHECK:   $vgpr15 = COPY [[UV15]](s32)
-  ; CHECK:   $vgpr16 = COPY [[UV16]](s32)
-  ; CHECK:   $vgpr17 = COPY [[UV17]](s32)
-  ; CHECK:   $vgpr18 = COPY [[UV18]](s32)
-  ; CHECK:   $vgpr19 = COPY [[UV19]](s32)
-  ; CHECK:   $vgpr20 = COPY [[UV20]](s32)
-  ; CHECK:   $vgpr21 = COPY [[UV21]](s32)
-  ; CHECK:   $vgpr22 = COPY [[UV22]](s32)
-  ; CHECK:   $vgpr23 = COPY [[UV23]](s32)
-  ; CHECK:   $vgpr24 = COPY [[UV24]](s32)
-  ; CHECK:   $vgpr25 = COPY [[UV25]](s32)
-  ; CHECK:   $vgpr26 = COPY [[UV26]](s32)
-  ; CHECK:   $vgpr27 = COPY [[UV27]](s32)
-  ; CHECK:   $vgpr28 = COPY [[UV28]](s32)
-  ; CHECK:   $vgpr29 = COPY [[UV29]](s32)
-  ; CHECK:   $vgpr30 = COPY [[UV30]](s32)
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C3]](s32)
-  ; CHECK:   G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C4]](s32)
-  ; CHECK:   G_STORE [[LOAD2]](p3), [[PTR_ADD2]](p5) :: (store (p3) into stack + 4, addrspace 5)
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; CHECK:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C5]](s32)
-  ; CHECK:   G_STORE [[LOAD3]](p5), [[PTR_ADD3]](p5) :: (store (p5) into stack + 8, align 8, addrspace 5)
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY16]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY17]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 12, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF1]](p1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr0, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(p3) = G_LOAD [[DEF1]](p1) :: (load (p3) from `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[COPY10]](p1) :: (load (p5) from `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_p3_p5
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
+  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
+  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
+  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
+  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
+  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
+  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
+  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
+  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
+  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
+  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
+  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
+  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
+  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
+  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
+  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
+  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
+  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
+  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
+  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
+  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
+  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
+  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
+  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C3]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C4]](s32)
+  ; CHECK-NEXT:   G_STORE [[LOAD2]](p3), [[PTR_ADD2]](p5) :: (store (p3) into stack + 4, addrspace 5)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C5]](s32)
+  ; CHECK-NEXT:   G_STORE [[LOAD3]](p5), [[PTR_ADD3]](p5) :: (store (p5) into stack + 8, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 12, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
   %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0
   %val1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* undef
@@ -3752,60 +3812,61 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
 define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_struct_i8_i32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
-  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ANYEXT1]](s32)
-  ; CHECK:   $vgpr1 = COPY [[LOAD2]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_struct_i8_i32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
+  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT1]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[LOAD2]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
   %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
   call void @external_void_func_struct_i8_i32({ i8, i32 } %val)
@@ -3815,26 +3876,27 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
 define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
-  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ANYEXT1]](s32)
-  ; CHECK:   $vgpr1 = COPY [[LOAD2]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
+  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT1]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[LOAD2]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
   %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
   call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val)
@@ -3844,26 +3906,27 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
 define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 {
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
-  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
-  ; CHECK:   $sgpr4 = COPY [[ANYEXT1]](s32)
-  ; CHECK:   $sgpr5 = COPY [[LOAD2]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
+  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
+  ; CHECK-NEXT:   $sgpr4 = COPY [[ANYEXT1]](s32)
+  ; CHECK-NEXT:   $sgpr5 = COPY [[LOAD2]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
   %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
   call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val)
@@ -3873,62 +3936,63 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
 define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_byval_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.val
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32)
-  ; CHECK:   G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.gep01, addrspace 5)
-  ; CHECK:   G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_byval_struct_i8_i32
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
-  ; CHECK:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32)
-  ; CHECK:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; CHECK:   G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.val, align 4, addrspace 5)
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_byval_struct_i8_i32, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.val
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32)
+  ; CHECK-NEXT:   G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.gep01, addrspace 5)
+  ; CHECK-NEXT:   G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_byval_struct_i8_i32
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32)
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; CHECK-NEXT:   G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.val, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_byval_struct_i8_i32, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %val = alloca { i8, i32 }, align 4, addrspace(5)
   %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 0
   %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 1
@@ -3943,53 +4007,54 @@ declare void @void_func_byval_a3i32_byval_i8_align32([3 x i32] addrspace(5)* byv
 define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming0, i8 addrspace(5)* align 32 %incoming1) #0 {
   ; CHECK-LABEL: name: call_byval_3ai32_byval_i8_align32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr1
-  ; CHECK:   [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 999
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_func_byval_a3i32_byval_i8_align32
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(p5) = COPY $sgpr32
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; CHECK:   G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C3]](s32)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-  ; CHECK:   G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5)
-  ; CHECK:   $vgpr0 = COPY [[C]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY16]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY17]](s32)
-  ; CHECK:   $vgpr31 = COPY [[COPY18]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 36, implicit-def $scc
-  ; CHECK:   [[COPY21:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]]
-  ; CHECK:   S_SETPC_B64_return [[COPY21]]
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 999
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_func_byval_a3i32_byval_i8_align32
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(p5) = COPY $sgpr32
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; CHECK-NEXT:   G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C3]](s32)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[C]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY18]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 36, implicit-def $scc
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY21]]
   call void @void_func_byval_a3i32_byval_i8_align32([3 x i32] addrspace(5)* byval([3 x i32]) %incoming0, i8 addrspace(5)* align 32 %incoming1, i32 999)
   ret void
 }
@@ -4001,46 +4066,47 @@ declare void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i6
 define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* align 256 %incoming_high_align) #0 {
   ; CHECK-LABEL: name: call_byval_a4i64_align4_higher_source_align
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0
-  ; CHECK:   [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(p5) = COPY $sgpr32
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-  ; CHECK:   G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5)
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[COPY12]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[COPY17]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 32, implicit-def $scc
-  ; CHECK:   [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]]
-  ; CHECK:   S_SETPC_B64_return [[COPY20]]
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(p5) = COPY $sgpr32
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+  ; CHECK-NEXT:   G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5)
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY12]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY17]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 32, implicit-def $scc
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY20]]
   call void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %incoming_high_align)
   ret void
 }
@@ -4048,60 +4114,61 @@ define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)*
 define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<2 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<2 x s8>) from %ir.ptr, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i8
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<2 x s8>)
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
-  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
-  ; CHECK:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ANYEXT2]](s32)
-  ; CHECK:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
-  ; CHECK:   $vgpr1 = COPY [[ANYEXT3]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<2 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<2 x s8>) from %ir.ptr, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i8
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<2 x s8>)
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
+  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+  ; CHECK-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT2]](s32)
+  ; CHECK-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[ANYEXT3]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %ptr = load <2 x i8> addrspace(1)*, <2 x i8> addrspace(1)* addrspace(4)* undef
   %val = load <2 x i8>, <2 x i8> addrspace(1)* %ptr
   call void @external_void_func_v2i8(<2 x i8> %val)
@@ -4111,63 +4178,64 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v3i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<3 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s8>) from %ir.ptr, align 4, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i8
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<3 x s8>)
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
-  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
-  ; CHECK:   [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
-  ; CHECK:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ANYEXT3]](s32)
-  ; CHECK:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
-  ; CHECK:   $vgpr1 = COPY [[ANYEXT4]](s32)
-  ; CHECK:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16)
-  ; CHECK:   $vgpr2 = COPY [[ANYEXT5]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<3 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s8>) from %ir.ptr, align 4, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i8
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<3 x s8>)
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
+  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+  ; CHECK-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
+  ; CHECK-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT3]](s32)
+  ; CHECK-NEXT:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[ANYEXT4]](s32)
+  ; CHECK-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[ANYEXT5]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %ptr = load <3 x i8> addrspace(1)*, <3 x i8> addrspace(1)* addrspace(4)* undef
   %val = load <3 x i8>, <3 x i8> addrspace(1)* %ptr
   call void @external_void_func_v3i8(<3 x i8> %val)
@@ -4177,66 +4245,67 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i8
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>)
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
-  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
-  ; CHECK:   [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
-  ; CHECK:   [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
-  ; CHECK:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ANYEXT4]](s32)
-  ; CHECK:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
-  ; CHECK:   $vgpr1 = COPY [[ANYEXT5]](s32)
-  ; CHECK:   [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16)
-  ; CHECK:   $vgpr2 = COPY [[ANYEXT6]](s32)
-  ; CHECK:   [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16)
-  ; CHECK:   $vgpr3 = COPY [[ANYEXT7]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i8
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>)
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
+  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+  ; CHECK-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
+  ; CHECK-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+  ; CHECK-NEXT:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT4]](s32)
+  ; CHECK-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[ANYEXT5]](s32)
+  ; CHECK-NEXT:   [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[ANYEXT6]](s32)
+  ; CHECK-NEXT:   [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[ANYEXT7]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %ptr = load <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef
   %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
   call void @external_void_func_v4i8(<4 x i8> %val)
@@ -4246,78 +4315,79 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v8i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<8 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s8>) from %ir.ptr, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v8i8
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<8 x s8>)
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
-  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
-  ; CHECK:   [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
-  ; CHECK:   [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
-  ; CHECK:   [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8)
-  ; CHECK:   [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8)
-  ; CHECK:   [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8)
-  ; CHECK:   [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8)
-  ; CHECK:   [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ANYEXT8]](s32)
-  ; CHECK:   [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
-  ; CHECK:   $vgpr1 = COPY [[ANYEXT9]](s32)
-  ; CHECK:   [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16)
-  ; CHECK:   $vgpr2 = COPY [[ANYEXT10]](s32)
-  ; CHECK:   [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16)
-  ; CHECK:   $vgpr3 = COPY [[ANYEXT11]](s32)
-  ; CHECK:   [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16)
-  ; CHECK:   $vgpr4 = COPY [[ANYEXT12]](s32)
-  ; CHECK:   [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16)
-  ; CHECK:   $vgpr5 = COPY [[ANYEXT13]](s32)
-  ; CHECK:   [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16)
-  ; CHECK:   $vgpr6 = COPY [[ANYEXT14]](s32)
-  ; CHECK:   [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16)
-  ; CHECK:   $vgpr7 = COPY [[ANYEXT15]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<8 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s8>) from %ir.ptr, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v8i8
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<8 x s8>)
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
+  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+  ; CHECK-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
+  ; CHECK-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+  ; CHECK-NEXT:   [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8)
+  ; CHECK-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8)
+  ; CHECK-NEXT:   [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8)
+  ; CHECK-NEXT:   [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8)
+  ; CHECK-NEXT:   [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT8]](s32)
+  ; CHECK-NEXT:   [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[ANYEXT9]](s32)
+  ; CHECK-NEXT:   [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[ANYEXT10]](s32)
+  ; CHECK-NEXT:   [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[ANYEXT11]](s32)
+  ; CHECK-NEXT:   [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[ANYEXT12]](s32)
+  ; CHECK-NEXT:   [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[ANYEXT13]](s32)
+  ; CHECK-NEXT:   [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[ANYEXT14]](s32)
+  ; CHECK-NEXT:   [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[ANYEXT15]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %ptr = load <8 x i8> addrspace(1)*, <8 x i8> addrspace(1)* addrspace(4)* undef
   %val = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
   call void @external_void_func_v8i8(<8 x i8> %val)
@@ -4327,102 +4397,103 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
 define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_v16i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v16i8
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>)
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
-  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
-  ; CHECK:   [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
-  ; CHECK:   [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
-  ; CHECK:   [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8)
-  ; CHECK:   [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8)
-  ; CHECK:   [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8)
-  ; CHECK:   [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8)
-  ; CHECK:   [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8)
-  ; CHECK:   [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8)
-  ; CHECK:   [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8)
-  ; CHECK:   [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8)
-  ; CHECK:   [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8)
-  ; CHECK:   [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8)
-  ; CHECK:   [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8)
-  ; CHECK:   [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8)
-  ; CHECK:   [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ANYEXT16]](s32)
-  ; CHECK:   [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
-  ; CHECK:   $vgpr1 = COPY [[ANYEXT17]](s32)
-  ; CHECK:   [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16)
-  ; CHECK:   $vgpr2 = COPY [[ANYEXT18]](s32)
-  ; CHECK:   [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16)
-  ; CHECK:   $vgpr3 = COPY [[ANYEXT19]](s32)
-  ; CHECK:   [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16)
-  ; CHECK:   $vgpr4 = COPY [[ANYEXT20]](s32)
-  ; CHECK:   [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16)
-  ; CHECK:   $vgpr5 = COPY [[ANYEXT21]](s32)
-  ; CHECK:   [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16)
-  ; CHECK:   $vgpr6 = COPY [[ANYEXT22]](s32)
-  ; CHECK:   [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16)
-  ; CHECK:   $vgpr7 = COPY [[ANYEXT23]](s32)
-  ; CHECK:   [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16)
-  ; CHECK:   $vgpr8 = COPY [[ANYEXT24]](s32)
-  ; CHECK:   [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16)
-  ; CHECK:   $vgpr9 = COPY [[ANYEXT25]](s32)
-  ; CHECK:   [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16)
-  ; CHECK:   $vgpr10 = COPY [[ANYEXT26]](s32)
-  ; CHECK:   [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16)
-  ; CHECK:   $vgpr11 = COPY [[ANYEXT27]](s32)
-  ; CHECK:   [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16)
-  ; CHECK:   $vgpr12 = COPY [[ANYEXT28]](s32)
-  ; CHECK:   [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16)
-  ; CHECK:   $vgpr13 = COPY [[ANYEXT29]](s32)
-  ; CHECK:   [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16)
-  ; CHECK:   $vgpr14 = COPY [[ANYEXT30]](s32)
-  ; CHECK:   [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16)
-  ; CHECK:   $vgpr15 = COPY [[ANYEXT31]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v16i8
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>)
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
+  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+  ; CHECK-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
+  ; CHECK-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+  ; CHECK-NEXT:   [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8)
+  ; CHECK-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8)
+  ; CHECK-NEXT:   [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8)
+  ; CHECK-NEXT:   [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8)
+  ; CHECK-NEXT:   [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8)
+  ; CHECK-NEXT:   [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8)
+  ; CHECK-NEXT:   [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8)
+  ; CHECK-NEXT:   [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8)
+  ; CHECK-NEXT:   [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8)
+  ; CHECK-NEXT:   [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8)
+  ; CHECK-NEXT:   [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8)
+  ; CHECK-NEXT:   [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8)
+  ; CHECK-NEXT:   [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT16]](s32)
+  ; CHECK-NEXT:   [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[ANYEXT17]](s32)
+  ; CHECK-NEXT:   [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[ANYEXT18]](s32)
+  ; CHECK-NEXT:   [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[ANYEXT19]](s32)
+  ; CHECK-NEXT:   [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[ANYEXT20]](s32)
+  ; CHECK-NEXT:   [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[ANYEXT21]](s32)
+  ; CHECK-NEXT:   [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[ANYEXT22]](s32)
+  ; CHECK-NEXT:   [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[ANYEXT23]](s32)
+  ; CHECK-NEXT:   [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16)
+  ; CHECK-NEXT:   $vgpr8 = COPY [[ANYEXT24]](s32)
+  ; CHECK-NEXT:   [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16)
+  ; CHECK-NEXT:   $vgpr9 = COPY [[ANYEXT25]](s32)
+  ; CHECK-NEXT:   [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16)
+  ; CHECK-NEXT:   $vgpr10 = COPY [[ANYEXT26]](s32)
+  ; CHECK-NEXT:   [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16)
+  ; CHECK-NEXT:   $vgpr11 = COPY [[ANYEXT27]](s32)
+  ; CHECK-NEXT:   [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16)
+  ; CHECK-NEXT:   $vgpr12 = COPY [[ANYEXT28]](s32)
+  ; CHECK-NEXT:   [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16)
+  ; CHECK-NEXT:   $vgpr13 = COPY [[ANYEXT29]](s32)
+  ; CHECK-NEXT:   [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16)
+  ; CHECK-NEXT:   $vgpr14 = COPY [[ANYEXT30]](s32)
+  ; CHECK-NEXT:   [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16)
+  ; CHECK-NEXT:   $vgpr15 = COPY [[ANYEXT31]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
   %ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
   %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
   call void @external_void_func_v16i8(<16 x i8> %val)
@@ -4432,98 +4503,99 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
 define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
   ; CHECK-LABEL: name: stack_passed_arg_alignment_v32i32_f64
   ; CHECK: bb.1.entry:
-  ; CHECK:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; CHECK:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset.cast, align 16, addrspace 4)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C]](s64)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset.cast, align 16, addrspace 4)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @stack_passed_f64_arg
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<32 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](s32)
-  ; CHECK:   $vgpr6 = COPY [[UV6]](s32)
-  ; CHECK:   $vgpr7 = COPY [[UV7]](s32)
-  ; CHECK:   $vgpr8 = COPY [[UV8]](s32)
-  ; CHECK:   $vgpr9 = COPY [[UV9]](s32)
-  ; CHECK:   $vgpr10 = COPY [[UV10]](s32)
-  ; CHECK:   $vgpr11 = COPY [[UV11]](s32)
-  ; CHECK:   $vgpr12 = COPY [[UV12]](s32)
-  ; CHECK:   $vgpr13 = COPY [[UV13]](s32)
-  ; CHECK:   $vgpr14 = COPY [[UV14]](s32)
-  ; CHECK:   $vgpr15 = COPY [[UV15]](s32)
-  ; CHECK:   $vgpr16 = COPY [[UV16]](s32)
-  ; CHECK:   $vgpr17 = COPY [[UV17]](s32)
-  ; CHECK:   $vgpr18 = COPY [[UV18]](s32)
-  ; CHECK:   $vgpr19 = COPY [[UV19]](s32)
-  ; CHECK:   $vgpr20 = COPY [[UV20]](s32)
-  ; CHECK:   $vgpr21 = COPY [[UV21]](s32)
-  ; CHECK:   $vgpr22 = COPY [[UV22]](s32)
-  ; CHECK:   $vgpr23 = COPY [[UV23]](s32)
-  ; CHECK:   $vgpr24 = COPY [[UV24]](s32)
-  ; CHECK:   $vgpr25 = COPY [[UV25]](s32)
-  ; CHECK:   $vgpr26 = COPY [[UV26]](s32)
-  ; CHECK:   $vgpr27 = COPY [[UV27]](s32)
-  ; CHECK:   $vgpr28 = COPY [[UV28]](s32)
-  ; CHECK:   $vgpr29 = COPY [[UV29]](s32)
-  ; CHECK:   $vgpr30 = COPY [[UV30]](s32)
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
-  ; CHECK:   G_STORE [[UV31]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack, align 16, addrspace 5)
-  ; CHECK:   [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64)
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C5]](s32)
-  ; CHECK:   G_STORE [[UV32]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 4, addrspace 5)
-  ; CHECK:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; CHECK:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32)
-  ; CHECK:   G_STORE [[UV33]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @stack_passed_f64_arg, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 12, implicit-def $scc
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset.cast, align 16, addrspace 4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C]](s64)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset.cast, align 16, addrspace 4)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @stack_passed_f64_arg
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C1]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C2]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
+  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<32 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
+  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
+  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
+  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
+  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
+  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
+  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
+  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
+  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
+  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
+  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
+  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
+  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
+  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
+  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
+  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
+  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
+  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
+  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
+  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
+  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
+  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
+  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
+  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C5]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV32]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 4, addrspace 5)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV33]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @stack_passed_f64_arg, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 12, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
 entry:
   call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
   ret void
@@ -4532,127 +4604,128 @@ entry:
 define void @stack_12xv3i32() #0 {
   ; CHECK-LABEL: name: stack_12xv3i32
   ; CHECK: bb.1.entry:
-  ; CHECK:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-  ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-  ; CHECK:   [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-  ; CHECK:   [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[BUILD_VECTOR4:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32)
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-  ; CHECK:   [[BUILD_VECTOR5:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32)
-  ; CHECK:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-  ; CHECK:   [[BUILD_VECTOR6:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C6]](s32), [[C6]](s32), [[C6]](s32)
-  ; CHECK:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-  ; CHECK:   [[BUILD_VECTOR7:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C7]](s32), [[C7]](s32), [[C7]](s32)
-  ; CHECK:   [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; CHECK:   [[BUILD_VECTOR8:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C8]](s32), [[C8]](s32), [[C8]](s32)
-  ; CHECK:   [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-  ; CHECK:   [[BUILD_VECTOR9:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C9]](s32), [[C9]](s32), [[C9]](s32)
-  ; CHECK:   [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
-  ; CHECK:   [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; CHECK:   [[BUILD_VECTOR10:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C10]](s32), [[C11]](s32), [[C12]](s32)
-  ; CHECK:   [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
-  ; CHECK:   [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
-  ; CHECK:   [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
-  ; CHECK:   [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](s32)
-  ; CHECK:   [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>)
-  ; CHECK:   $vgpr6 = COPY [[UV6]](s32)
-  ; CHECK:   $vgpr7 = COPY [[UV7]](s32)
-  ; CHECK:   $vgpr8 = COPY [[UV8]](s32)
-  ; CHECK:   [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x s32>)
-  ; CHECK:   $vgpr9 = COPY [[UV9]](s32)
-  ; CHECK:   $vgpr10 = COPY [[UV10]](s32)
-  ; CHECK:   $vgpr11 = COPY [[UV11]](s32)
-  ; CHECK:   [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x s32>)
-  ; CHECK:   $vgpr12 = COPY [[UV12]](s32)
-  ; CHECK:   $vgpr13 = COPY [[UV13]](s32)
-  ; CHECK:   $vgpr14 = COPY [[UV14]](s32)
-  ; CHECK:   [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x s32>)
-  ; CHECK:   $vgpr15 = COPY [[UV15]](s32)
-  ; CHECK:   $vgpr16 = COPY [[UV16]](s32)
-  ; CHECK:   $vgpr17 = COPY [[UV17]](s32)
-  ; CHECK:   [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x s32>)
-  ; CHECK:   $vgpr18 = COPY [[UV18]](s32)
-  ; CHECK:   $vgpr19 = COPY [[UV19]](s32)
-  ; CHECK:   $vgpr20 = COPY [[UV20]](s32)
-  ; CHECK:   [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x s32>)
-  ; CHECK:   $vgpr21 = COPY [[UV21]](s32)
-  ; CHECK:   $vgpr22 = COPY [[UV22]](s32)
-  ; CHECK:   $vgpr23 = COPY [[UV23]](s32)
-  ; CHECK:   [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>)
-  ; CHECK:   $vgpr24 = COPY [[UV24]](s32)
-  ; CHECK:   $vgpr25 = COPY [[UV25]](s32)
-  ; CHECK:   $vgpr26 = COPY [[UV26]](s32)
-  ; CHECK:   [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>)
-  ; CHECK:   $vgpr27 = COPY [[UV27]](s32)
-  ; CHECK:   $vgpr28 = COPY [[UV28]](s32)
-  ; CHECK:   $vgpr29 = COPY [[UV29]](s32)
-  ; CHECK:   [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>)
-  ; CHECK:   $vgpr30 = COPY [[UV30]](s32)
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32
-  ; CHECK:   [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32)
-  ; CHECK:   G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
-  ; CHECK:   [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32)
-  ; CHECK:   G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
-  ; CHECK:   [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>)
-  ; CHECK:   [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32)
-  ; CHECK:   G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
-  ; CHECK:   [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; CHECK:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32)
-  ; CHECK:   G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5)
-  ; CHECK:   [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-  ; CHECK:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32)
-  ; CHECK:   G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY13]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY15]](s32)
-  ; CHECK:   $vgpr31 = COPY [[COPY16]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 20, implicit-def $scc
-  ; CHECK:   [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
-  ; CHECK:   S_SETPC_B64_return [[COPY19]]
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+  ; CHECK-NEXT:   [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[BUILD_VECTOR4:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+  ; CHECK-NEXT:   [[BUILD_VECTOR5:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+  ; CHECK-NEXT:   [[BUILD_VECTOR6:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C6]](s32), [[C6]](s32), [[C6]](s32)
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+  ; CHECK-NEXT:   [[BUILD_VECTOR7:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C7]](s32), [[C7]](s32), [[C7]](s32)
+  ; CHECK-NEXT:   [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; CHECK-NEXT:   [[BUILD_VECTOR8:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C8]](s32), [[C8]](s32), [[C8]](s32)
+  ; CHECK-NEXT:   [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+  ; CHECK-NEXT:   [[BUILD_VECTOR9:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C9]](s32), [[C9]](s32), [[C9]](s32)
+  ; CHECK-NEXT:   [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+  ; CHECK-NEXT:   [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; CHECK-NEXT:   [[BUILD_VECTOR10:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C10]](s32), [[C11]](s32), [[C12]](s32)
+  ; CHECK-NEXT:   [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+  ; CHECK-NEXT:   [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+  ; CHECK-NEXT:   [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+  ; CHECK-NEXT:   [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; CHECK-NEXT:   [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
+  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
+  ; CHECK-NEXT:   [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
+  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
+  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
+  ; CHECK-NEXT:   [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
+  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
+  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
+  ; CHECK-NEXT:   [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
+  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
+  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
+  ; CHECK-NEXT:   [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
+  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
+  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
+  ; CHECK-NEXT:   [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
+  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
+  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
+  ; CHECK-NEXT:   [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
+  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
+  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
+  ; CHECK-NEXT:   [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
+  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
+  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
+  ; CHECK-NEXT:   [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32
+  ; CHECK-NEXT:   [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
+  ; CHECK-NEXT:   [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>)
+  ; CHECK-NEXT:   [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5)
+  ; CHECK-NEXT:   [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 20, implicit-def $scc
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY19]]
 entry:
   call void @external_void_func_12xv3i32(
       <3 x i32> <i32 0, i32 0, i32 0>,
@@ -4673,127 +4746,128 @@ entry:
 define void @stack_12xv3f32() #0 {
   ; CHECK-LABEL: name: stack_12xv3f32
   ; CHECK: bb.1.entry:
-  ; CHECK:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
-  ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
-  ; CHECK:   [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.000000e+00
-  ; CHECK:   [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
-  ; CHECK:   [[BUILD_VECTOR4:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32)
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e+00
-  ; CHECK:   [[BUILD_VECTOR5:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32)
-  ; CHECK:   [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.000000e+00
-  ; CHECK:   [[BUILD_VECTOR6:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C6]](s32), [[C6]](s32), [[C6]](s32)
-  ; CHECK:   [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 7.000000e+00
-  ; CHECK:   [[BUILD_VECTOR7:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C7]](s32), [[C7]](s32), [[C7]](s32)
-  ; CHECK:   [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 8.000000e+00
-  ; CHECK:   [[BUILD_VECTOR8:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C8]](s32), [[C8]](s32), [[C8]](s32)
-  ; CHECK:   [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 9.000000e+00
-  ; CHECK:   [[BUILD_VECTOR9:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C9]](s32), [[C9]](s32), [[C9]](s32)
-  ; CHECK:   [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+01
-  ; CHECK:   [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.100000e+01
-  ; CHECK:   [[C12:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.200000e+01
-  ; CHECK:   [[BUILD_VECTOR10:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C10]](s32), [[C11]](s32), [[C12]](s32)
-  ; CHECK:   [[C13:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.300000e+01
-  ; CHECK:   [[C14:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.400000e+01
-  ; CHECK:   [[C15:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+01
-  ; CHECK:   [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](s32)
-  ; CHECK:   [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>)
-  ; CHECK:   $vgpr6 = COPY [[UV6]](s32)
-  ; CHECK:   $vgpr7 = COPY [[UV7]](s32)
-  ; CHECK:   $vgpr8 = COPY [[UV8]](s32)
-  ; CHECK:   [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x s32>)
-  ; CHECK:   $vgpr9 = COPY [[UV9]](s32)
-  ; CHECK:   $vgpr10 = COPY [[UV10]](s32)
-  ; CHECK:   $vgpr11 = COPY [[UV11]](s32)
-  ; CHECK:   [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x s32>)
-  ; CHECK:   $vgpr12 = COPY [[UV12]](s32)
-  ; CHECK:   $vgpr13 = COPY [[UV13]](s32)
-  ; CHECK:   $vgpr14 = COPY [[UV14]](s32)
-  ; CHECK:   [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x s32>)
-  ; CHECK:   $vgpr15 = COPY [[UV15]](s32)
-  ; CHECK:   $vgpr16 = COPY [[UV16]](s32)
-  ; CHECK:   $vgpr17 = COPY [[UV17]](s32)
-  ; CHECK:   [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x s32>)
-  ; CHECK:   $vgpr18 = COPY [[UV18]](s32)
-  ; CHECK:   $vgpr19 = COPY [[UV19]](s32)
-  ; CHECK:   $vgpr20 = COPY [[UV20]](s32)
-  ; CHECK:   [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x s32>)
-  ; CHECK:   $vgpr21 = COPY [[UV21]](s32)
-  ; CHECK:   $vgpr22 = COPY [[UV22]](s32)
-  ; CHECK:   $vgpr23 = COPY [[UV23]](s32)
-  ; CHECK:   [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>)
-  ; CHECK:   $vgpr24 = COPY [[UV24]](s32)
-  ; CHECK:   $vgpr25 = COPY [[UV25]](s32)
-  ; CHECK:   $vgpr26 = COPY [[UV26]](s32)
-  ; CHECK:   [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>)
-  ; CHECK:   $vgpr27 = COPY [[UV27]](s32)
-  ; CHECK:   $vgpr28 = COPY [[UV28]](s32)
-  ; CHECK:   $vgpr29 = COPY [[UV29]](s32)
-  ; CHECK:   [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>)
-  ; CHECK:   $vgpr30 = COPY [[UV30]](s32)
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32
-  ; CHECK:   [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32)
-  ; CHECK:   G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
-  ; CHECK:   [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32)
-  ; CHECK:   G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
-  ; CHECK:   [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>)
-  ; CHECK:   [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32)
-  ; CHECK:   G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
-  ; CHECK:   [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; CHECK:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32)
-  ; CHECK:   G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5)
-  ; CHECK:   [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-  ; CHECK:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32)
-  ; CHECK:   G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY13]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY15]](s32)
-  ; CHECK:   $vgpr31 = COPY [[COPY16]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 20, implicit-def $scc
-  ; CHECK:   [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
-  ; CHECK:   S_SETPC_B64_return [[COPY19]]
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR4:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR5:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR6:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C6]](s32), [[C6]](s32), [[C6]](s32)
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 7.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR7:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C7]](s32), [[C7]](s32), [[C7]](s32)
+  ; CHECK-NEXT:   [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 8.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR8:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C8]](s32), [[C8]](s32), [[C8]](s32)
+  ; CHECK-NEXT:   [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 9.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR9:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C9]](s32), [[C9]](s32), [[C9]](s32)
+  ; CHECK-NEXT:   [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+01
+  ; CHECK-NEXT:   [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.100000e+01
+  ; CHECK-NEXT:   [[C12:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.200000e+01
+  ; CHECK-NEXT:   [[BUILD_VECTOR10:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C10]](s32), [[C11]](s32), [[C12]](s32)
+  ; CHECK-NEXT:   [[C13:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.300000e+01
+  ; CHECK-NEXT:   [[C14:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.400000e+01
+  ; CHECK-NEXT:   [[C15:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+01
+  ; CHECK-NEXT:   [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; CHECK-NEXT:   [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
+  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
+  ; CHECK-NEXT:   [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
+  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
+  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
+  ; CHECK-NEXT:   [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
+  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
+  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
+  ; CHECK-NEXT:   [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
+  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
+  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
+  ; CHECK-NEXT:   [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
+  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
+  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
+  ; CHECK-NEXT:   [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
+  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
+  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
+  ; CHECK-NEXT:   [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
+  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
+  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
+  ; CHECK-NEXT:   [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
+  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
+  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
+  ; CHECK-NEXT:   [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>)
+  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32
+  ; CHECK-NEXT:   [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
+  ; CHECK-NEXT:   [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>)
+  ; CHECK-NEXT:   [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5)
+  ; CHECK-NEXT:   [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 20, implicit-def $scc
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY19]]
 entry:
   call void @external_void_func_12xv3f32(
       <3 x float> <float 0.0, float 0.0, float 0.0>,
@@ -4814,131 +4888,132 @@ entry:
 define void @stack_8xv5i32() #0 {
   ; CHECK-LABEL: name: stack_8xv5i32
   ; CHECK: bb.1.entry:
-  ; CHECK:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-  ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-  ; CHECK:   [[BUILD_VECTOR2:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-  ; CHECK:   [[BUILD_VECTOR3:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[BUILD_VECTOR4:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32)
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-  ; CHECK:   [[BUILD_VECTOR5:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32)
-  ; CHECK:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-  ; CHECK:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
-  ; CHECK:   [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; CHECK:   [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-  ; CHECK:   [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[BUILD_VECTOR6:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C6]](s32), [[C7]](s32), [[C8]](s32), [[C9]](s32), [[C10]](s32)
-  ; CHECK:   [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
-  ; CHECK:   [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; CHECK:   [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
-  ; CHECK:   [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
-  ; CHECK:   [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
-  ; CHECK:   [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK:   [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](s32)
-  ; CHECK:   $vgpr6 = COPY [[UV6]](s32)
-  ; CHECK:   $vgpr7 = COPY [[UV7]](s32)
-  ; CHECK:   $vgpr8 = COPY [[UV8]](s32)
-  ; CHECK:   $vgpr9 = COPY [[UV9]](s32)
-  ; CHECK:   [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>)
-  ; CHECK:   $vgpr10 = COPY [[UV10]](s32)
-  ; CHECK:   $vgpr11 = COPY [[UV11]](s32)
-  ; CHECK:   $vgpr12 = COPY [[UV12]](s32)
-  ; CHECK:   $vgpr13 = COPY [[UV13]](s32)
-  ; CHECK:   $vgpr14 = COPY [[UV14]](s32)
-  ; CHECK:   [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<5 x s32>)
-  ; CHECK:   $vgpr15 = COPY [[UV15]](s32)
-  ; CHECK:   $vgpr16 = COPY [[UV16]](s32)
-  ; CHECK:   $vgpr17 = COPY [[UV17]](s32)
-  ; CHECK:   $vgpr18 = COPY [[UV18]](s32)
-  ; CHECK:   $vgpr19 = COPY [[UV19]](s32)
-  ; CHECK:   [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>)
-  ; CHECK:   $vgpr20 = COPY [[UV20]](s32)
-  ; CHECK:   $vgpr21 = COPY [[UV21]](s32)
-  ; CHECK:   $vgpr22 = COPY [[UV22]](s32)
-  ; CHECK:   $vgpr23 = COPY [[UV23]](s32)
-  ; CHECK:   $vgpr24 = COPY [[UV24]](s32)
-  ; CHECK:   [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>)
-  ; CHECK:   $vgpr25 = COPY [[UV25]](s32)
-  ; CHECK:   $vgpr26 = COPY [[UV26]](s32)
-  ; CHECK:   $vgpr27 = COPY [[UV27]](s32)
-  ; CHECK:   $vgpr28 = COPY [[UV28]](s32)
-  ; CHECK:   $vgpr29 = COPY [[UV29]](s32)
-  ; CHECK:   [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>)
-  ; CHECK:   $vgpr30 = COPY [[UV30]](s32)
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32
-  ; CHECK:   [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32)
-  ; CHECK:   G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
-  ; CHECK:   [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32)
-  ; CHECK:   G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
-  ; CHECK:   [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32)
-  ; CHECK:   G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
-  ; CHECK:   [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; CHECK:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32)
-  ; CHECK:   G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5)
-  ; CHECK:   [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>)
-  ; CHECK:   [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-  ; CHECK:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32)
-  ; CHECK:   G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5)
-  ; CHECK:   [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32)
-  ; CHECK:   G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5)
-  ; CHECK:   [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-  ; CHECK:   [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32)
-  ; CHECK:   G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5)
-  ; CHECK:   [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-  ; CHECK:   [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32)
-  ; CHECK:   G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5)
-  ; CHECK:   [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-  ; CHECK:   [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32)
-  ; CHECK:   G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY13]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY15]](s32)
-  ; CHECK:   $vgpr31 = COPY [[COPY16]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 36, implicit-def $scc
-  ; CHECK:   [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
-  ; CHECK:   S_SETPC_B64_return [[COPY19]]
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+  ; CHECK-NEXT:   [[BUILD_VECTOR3:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[BUILD_VECTOR4:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+  ; CHECK-NEXT:   [[BUILD_VECTOR5:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+  ; CHECK-NEXT:   [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; CHECK-NEXT:   [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+  ; CHECK-NEXT:   [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+  ; CHECK-NEXT:   [[BUILD_VECTOR6:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C6]](s32), [[C7]](s32), [[C8]](s32), [[C9]](s32), [[C10]](s32)
+  ; CHECK-NEXT:   [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+  ; CHECK-NEXT:   [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; CHECK-NEXT:   [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+  ; CHECK-NEXT:   [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+  ; CHECK-NEXT:   [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+  ; CHECK-NEXT:   [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; CHECK-NEXT:   [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
+  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
+  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
+  ; CHECK-NEXT:   [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>)
+  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
+  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
+  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
+  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
+  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
+  ; CHECK-NEXT:   [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<5 x s32>)
+  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
+  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
+  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
+  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
+  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
+  ; CHECK-NEXT:   [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>)
+  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
+  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
+  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
+  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
+  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
+  ; CHECK-NEXT:   [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>)
+  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
+  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
+  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
+  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
+  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
+  ; CHECK-NEXT:   [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>)
+  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32
+  ; CHECK-NEXT:   [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
+  ; CHECK-NEXT:   [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5)
+  ; CHECK-NEXT:   [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>)
+  ; CHECK-NEXT:   [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5)
+  ; CHECK-NEXT:   [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+  ; CHECK-NEXT:   [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+  ; CHECK-NEXT:   [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5)
+  ; CHECK-NEXT:   [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+  ; CHECK-NEXT:   [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 36, implicit-def $scc
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY19]]
 entry:
   call void @external_void_func_8xv5i32(
       <5 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0>,
@@ -4955,131 +5030,132 @@ entry:
 define void @stack_8xv5f32() #0 {
   ; CHECK-LABEL: name: stack_8xv5f32
   ; CHECK: bb.1.entry:
-  ; CHECK:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
-  ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
-  ; CHECK:   [[BUILD_VECTOR2:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.000000e+00
-  ; CHECK:   [[BUILD_VECTOR3:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
-  ; CHECK:   [[BUILD_VECTOR4:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32)
-  ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e+00
-  ; CHECK:   [[BUILD_VECTOR5:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32)
-  ; CHECK:   [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.000000e+00
-  ; CHECK:   [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 7.000000e+00
-  ; CHECK:   [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 8.000000e+00
-  ; CHECK:   [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 9.000000e+00
-  ; CHECK:   [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+01
-  ; CHECK:   [[BUILD_VECTOR6:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C6]](s32), [[C7]](s32), [[C8]](s32), [[C9]](s32), [[C10]](s32)
-  ; CHECK:   [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.100000e+01
-  ; CHECK:   [[C12:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.200000e+01
-  ; CHECK:   [[C13:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.300000e+01
-  ; CHECK:   [[C14:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.400000e+01
-  ; CHECK:   [[C15:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+01
-  ; CHECK:   [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32)
-  ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   $vgpr2 = COPY [[UV2]](s32)
-  ; CHECK:   $vgpr3 = COPY [[UV3]](s32)
-  ; CHECK:   $vgpr4 = COPY [[UV4]](s32)
-  ; CHECK:   [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>)
-  ; CHECK:   $vgpr5 = COPY [[UV5]](s32)
-  ; CHECK:   $vgpr6 = COPY [[UV6]](s32)
-  ; CHECK:   $vgpr7 = COPY [[UV7]](s32)
-  ; CHECK:   $vgpr8 = COPY [[UV8]](s32)
-  ; CHECK:   $vgpr9 = COPY [[UV9]](s32)
-  ; CHECK:   [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>)
-  ; CHECK:   $vgpr10 = COPY [[UV10]](s32)
-  ; CHECK:   $vgpr11 = COPY [[UV11]](s32)
-  ; CHECK:   $vgpr12 = COPY [[UV12]](s32)
-  ; CHECK:   $vgpr13 = COPY [[UV13]](s32)
-  ; CHECK:   $vgpr14 = COPY [[UV14]](s32)
-  ; CHECK:   [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<5 x s32>)
-  ; CHECK:   $vgpr15 = COPY [[UV15]](s32)
-  ; CHECK:   $vgpr16 = COPY [[UV16]](s32)
-  ; CHECK:   $vgpr17 = COPY [[UV17]](s32)
-  ; CHECK:   $vgpr18 = COPY [[UV18]](s32)
-  ; CHECK:   $vgpr19 = COPY [[UV19]](s32)
-  ; CHECK:   [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>)
-  ; CHECK:   $vgpr20 = COPY [[UV20]](s32)
-  ; CHECK:   $vgpr21 = COPY [[UV21]](s32)
-  ; CHECK:   $vgpr22 = COPY [[UV22]](s32)
-  ; CHECK:   $vgpr23 = COPY [[UV23]](s32)
-  ; CHECK:   $vgpr24 = COPY [[UV24]](s32)
-  ; CHECK:   [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>)
-  ; CHECK:   $vgpr25 = COPY [[UV25]](s32)
-  ; CHECK:   $vgpr26 = COPY [[UV26]](s32)
-  ; CHECK:   $vgpr27 = COPY [[UV27]](s32)
-  ; CHECK:   $vgpr28 = COPY [[UV28]](s32)
-  ; CHECK:   $vgpr29 = COPY [[UV29]](s32)
-  ; CHECK:   [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>)
-  ; CHECK:   $vgpr30 = COPY [[UV30]](s32)
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32
-  ; CHECK:   [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32)
-  ; CHECK:   G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
-  ; CHECK:   [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32)
-  ; CHECK:   G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
-  ; CHECK:   [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32)
-  ; CHECK:   G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
-  ; CHECK:   [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; CHECK:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32)
-  ; CHECK:   G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5)
-  ; CHECK:   [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>)
-  ; CHECK:   [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-  ; CHECK:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32)
-  ; CHECK:   G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5)
-  ; CHECK:   [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32)
-  ; CHECK:   G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5)
-  ; CHECK:   [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-  ; CHECK:   [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32)
-  ; CHECK:   G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5)
-  ; CHECK:   [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-  ; CHECK:   [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32)
-  ; CHECK:   G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5)
-  ; CHECK:   [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-  ; CHECK:   [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32)
-  ; CHECK:   G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5)
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY13]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY15]](s32)
-  ; CHECK:   $vgpr31 = COPY [[COPY16]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
-  ; CHECK:   ADJCALLSTACKDOWN 0, 36, implicit-def $scc
-  ; CHECK:   [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
-  ; CHECK:   S_SETPC_B64_return [[COPY19]]
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR3:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR4:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR5:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.000000e+00
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 7.000000e+00
+  ; CHECK-NEXT:   [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 8.000000e+00
+  ; CHECK-NEXT:   [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 9.000000e+00
+  ; CHECK-NEXT:   [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+01
+  ; CHECK-NEXT:   [[BUILD_VECTOR6:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C6]](s32), [[C7]](s32), [[C8]](s32), [[C9]](s32), [[C10]](s32)
+  ; CHECK-NEXT:   [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.100000e+01
+  ; CHECK-NEXT:   [[C12:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.200000e+01
+  ; CHECK-NEXT:   [[C13:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.300000e+01
+  ; CHECK-NEXT:   [[C14:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.400000e+01
+  ; CHECK-NEXT:   [[C15:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+01
+  ; CHECK-NEXT:   [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32)
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
+  ; CHECK-NEXT:   [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>)
+  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
+  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
+  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
+  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
+  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
+  ; CHECK-NEXT:   [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>)
+  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
+  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
+  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
+  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
+  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
+  ; CHECK-NEXT:   [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<5 x s32>)
+  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
+  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
+  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
+  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
+  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
+  ; CHECK-NEXT:   [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>)
+  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
+  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
+  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
+  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
+  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
+  ; CHECK-NEXT:   [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>)
+  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
+  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
+  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
+  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
+  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
+  ; CHECK-NEXT:   [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>)
+  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32
+  ; CHECK-NEXT:   [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
+  ; CHECK-NEXT:   [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5)
+  ; CHECK-NEXT:   [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>)
+  ; CHECK-NEXT:   [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; CHECK-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5)
+  ; CHECK-NEXT:   [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+  ; CHECK-NEXT:   [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+  ; CHECK-NEXT:   [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5)
+  ; CHECK-NEXT:   [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+  ; CHECK-NEXT:   [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32)
+  ; CHECK-NEXT:   G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 36, implicit-def $scc
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY19]]
 entry:
   call void @external_void_func_8xv5f32(
       <5 x float> <float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>,

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index c5a1eb8b6a10b..7732958191914 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -5,14 +5,15 @@
 define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 {
   ; GCN-LABEL: name: i32_fastcc_i32_i32
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
-  ; GCN:   $vgpr0 = COPY [[ADD]](s32)
-  ; GCN:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; GCN:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
+  ; GCN-NEXT:   $vgpr0 = COPY [[ADD]](s32)
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; GCN-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %add0 = add i32 %arg0, %arg1
   ret i32 %add0
 }
@@ -20,19 +21,20 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 {
 define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 {
   ; GCN-LABEL: name: i32_fastcc_i32_i32_stack_object
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
-  ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32)
-  ; GCN:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
-  ; GCN:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
-  ; GCN:   $vgpr0 = COPY [[ADD]](s32)
-  ; GCN:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; GCN:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32)
+  ; GCN-NEXT:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
+  ; GCN-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
+  ; GCN-NEXT:   $vgpr0 = COPY [[ADD]](s32)
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; GCN-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
   store volatile i32 9, i32 addrspace(5)* %gep
@@ -43,17 +45,18 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 {
 define hidden fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
-  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
-  ; GCN:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
+  ; GCN-NEXT:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN-NEXT:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b)
   ret i32 %ret
@@ -62,22 +65,23 @@ entry:
 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, i32 %c) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_stack_object
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
-  ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32)
-  ; GCN:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
-  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
-  ; GCN:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32)
+  ; GCN-NEXT:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
+  ; GCN-NEXT:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN-NEXT:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
@@ -89,22 +93,23 @@ entry:
 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i32 %b, i32 %c) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_callee_stack_object
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
-  ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32)
-  ; GCN:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_stack_object
-  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
-  ; GCN:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32)
+  ; GCN-NEXT:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_stack_object
+  ; GCN-NEXT:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN-NEXT:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
@@ -116,17 +121,18 @@ entry:
 define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_unused_result
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
-  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
-  ; GCN:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
+  ; GCN-NEXT:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN-NEXT:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b)
   ret void
@@ -136,26 +142,27 @@ entry:
 define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 {
   ; GCN-LABEL: name: kernel_call_i32_fastcc_i32_i32_unused_result
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr8_sgpr9
-  ; GCN:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
-  ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-  ; GCN:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; GCN:   [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<2 x s32>) from %ir.0, align 16, addrspace 4)
-  ; GCN:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C]](s32)
-  ; GCN:   [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C1]](s32)
-  ; GCN:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C2]](s64)
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
-  ; GCN:   $vgpr0 = COPY [[EVEC]](s32)
-  ; GCN:   $vgpr1 = COPY [[EVEC1]](s32)
-  ; GCN:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
-  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   S_ENDPGM 0
+  ; GCN-NEXT:   liveins: $sgpr8_sgpr9
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; GCN-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<2 x s32>) from %ir.0, align 16, addrspace 4)
+  ; GCN-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C]](s32)
+  ; GCN-NEXT:   [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C1]](s32)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C2]](s64)
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
+  ; GCN-NEXT:   $vgpr0 = COPY [[EVEC]](s32)
+  ; GCN-NEXT:   $vgpr1 = COPY [[EVEC1]](s32)
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   S_ENDPGM 0
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b)
   ret void
@@ -164,16 +171,17 @@ entry:
 define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)* byval(i32) align 4 %arg1) #1 {
   ; GCN-LABEL: name: i32_fastcc_i32_byval_i32
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; GCN:   [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s32) from %ir.arg1, addrspace 5)
-  ; GCN:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[LOAD]]
-  ; GCN:   $vgpr0 = COPY [[ADD]](s32)
-  ; GCN:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; GCN:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; GCN-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s32) from %ir.arg1, addrspace 5)
+  ; GCN-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[LOAD]]
+  ; GCN-NEXT:   $vgpr0 = COPY [[ADD]](s32)
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; GCN-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %arg1.load = load i32, i32 addrspace(5)* %arg1, align 4
   %add0 = add i32 %arg0, %arg1.load
   ret i32 %add0
@@ -183,28 +191,29 @@ define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)*
 define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i32 addrspace(5)* byval(i32) %b.byval, i32 %c) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32_byval_parent
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; GCN:   [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
-  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32
-  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
-  ; GCN:   [[COPY4:%[0-9]+]]:_(p5) = COPY $sgpr32
-  ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY4]], [[C]](s32)
-  ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; GCN:   G_MEMCPY [[PTR_ADD]](p5), [[COPY1]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5)
-  ; GCN:   [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
-  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
-  ; GCN:   $vgpr0 = COPY [[COPY6]](s32)
-  ; GCN:   [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; GCN:   S_SETPC_B64_return [[COPY7]], implicit $vgpr0
+  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32
+  ; GCN-NEXT:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(p5) = COPY $sgpr32
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY4]], [[C]](s32)
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; GCN-NEXT:   G_MEMCPY [[PTR_ADD]](p5), [[COPY1]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5)
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
+  ; GCN-NEXT:   $vgpr0 = COPY [[COPY6]](s32)
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; GCN-NEXT:   S_SETPC_B64_return [[COPY7]], implicit $vgpr0
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) %b.byval)
   ret i32 %ret
@@ -216,53 +225,54 @@ entry:
 define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %large) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; GCN:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; GCN:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
-  ; GCN:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-  ; GCN:   [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[C]](s32)
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32
-  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
-  ; GCN:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; GCN:   G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5)
-  ; GCN:   [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; GCN-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; GCN-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; GCN-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; GCN-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; GCN-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
+  ; GCN-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; GCN-NEXT:   [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[C]](s32)
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32
+  ; GCN-NEXT:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; GCN-NEXT:   G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5)
+  ; GCN-NEXT:   [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) inttoptr (i32 16 to i32 addrspace(5)*))
   ret i32 %ret
@@ -271,51 +281,52 @@ entry:
 define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %large) #1 {
   ; GCN-LABEL: name: i32_fastcc_i32_i32_a32i32
   ; GCN: bb.1 (%ir-block.0):
-  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; GCN:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; GCN:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
-  ; GCN:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; GCN:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5)
-  ; GCN:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
-  ; GCN:   [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[LOAD1]]
-  ; GCN:   [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[LOAD2]]
-  ; GCN:   $vgpr0 = COPY [[ADD2]](s32)
-  ; GCN:   [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
-  ; GCN:   S_SETPC_B64_return [[COPY32]], implicit $vgpr0
+  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; GCN-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; GCN-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; GCN-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; GCN-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; GCN-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; GCN-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5)
+  ; GCN-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
+  ; GCN-NEXT:   [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[LOAD1]]
+  ; GCN-NEXT:   [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[LOAD2]]
+  ; GCN-NEXT:   $vgpr0 = COPY [[ADD2]](s32)
+  ; GCN-NEXT:   [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
+  ; GCN-NEXT:   S_SETPC_B64_return [[COPY32]], implicit $vgpr0
   %val_firststack = extractvalue [32 x i32] %large, 30
   %val_laststack = extractvalue [32 x i32] %large, 31
   %add0 = add i32 %arg0, %arg1
@@ -327,86 +338,87 @@ define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %l
 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
-  ; GCN:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
-  ; GCN:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
-  ; GCN:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
-  ; GCN:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
-  ; GCN:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
-  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
-  ; GCN:   $vgpr2 = COPY [[COPY2]](s32)
-  ; GCN:   $vgpr3 = COPY [[COPY3]](s32)
-  ; GCN:   $vgpr4 = COPY [[COPY4]](s32)
-  ; GCN:   $vgpr5 = COPY [[COPY5]](s32)
-  ; GCN:   $vgpr6 = COPY [[COPY6]](s32)
-  ; GCN:   $vgpr7 = COPY [[COPY7]](s32)
-  ; GCN:   $vgpr8 = COPY [[COPY8]](s32)
-  ; GCN:   $vgpr9 = COPY [[COPY9]](s32)
-  ; GCN:   $vgpr10 = COPY [[COPY10]](s32)
-  ; GCN:   $vgpr11 = COPY [[COPY11]](s32)
-  ; GCN:   $vgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $vgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $vgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr15 = COPY [[COPY15]](s32)
-  ; GCN:   $vgpr16 = COPY [[COPY16]](s32)
-  ; GCN:   $vgpr17 = COPY [[COPY17]](s32)
-  ; GCN:   $vgpr18 = COPY [[COPY18]](s32)
-  ; GCN:   $vgpr19 = COPY [[COPY19]](s32)
-  ; GCN:   $vgpr20 = COPY [[COPY20]](s32)
-  ; GCN:   $vgpr21 = COPY [[COPY21]](s32)
-  ; GCN:   $vgpr22 = COPY [[COPY22]](s32)
-  ; GCN:   $vgpr23 = COPY [[COPY23]](s32)
-  ; GCN:   $vgpr24 = COPY [[COPY24]](s32)
-  ; GCN:   $vgpr25 = COPY [[COPY25]](s32)
-  ; GCN:   $vgpr26 = COPY [[COPY26]](s32)
-  ; GCN:   $vgpr27 = COPY [[COPY27]](s32)
-  ; GCN:   $vgpr28 = COPY [[COPY28]](s32)
-  ; GCN:   $vgpr29 = COPY [[COPY29]](s32)
-  ; GCN:   $vgpr30 = COPY [[COPY30]](s32)
-  ; GCN:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; GCN:   G_STORE [[LOAD]](s32), [[FRAME_INDEX3]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; GCN:   G_STORE [[LOAD1]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
-  ; GCN:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; GCN:   G_STORE [[LOAD2]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
-  ; GCN:   [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; GCN-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; GCN-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; GCN-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; GCN-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+  ; GCN-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+  ; GCN-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
+  ; GCN-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
+  ; GCN-NEXT:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN-NEXT:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   $vgpr2 = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   $vgpr3 = COPY [[COPY3]](s32)
+  ; GCN-NEXT:   $vgpr4 = COPY [[COPY4]](s32)
+  ; GCN-NEXT:   $vgpr5 = COPY [[COPY5]](s32)
+  ; GCN-NEXT:   $vgpr6 = COPY [[COPY6]](s32)
+  ; GCN-NEXT:   $vgpr7 = COPY [[COPY7]](s32)
+  ; GCN-NEXT:   $vgpr8 = COPY [[COPY8]](s32)
+  ; GCN-NEXT:   $vgpr9 = COPY [[COPY9]](s32)
+  ; GCN-NEXT:   $vgpr10 = COPY [[COPY10]](s32)
+  ; GCN-NEXT:   $vgpr11 = COPY [[COPY11]](s32)
+  ; GCN-NEXT:   $vgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $vgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $vgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr15 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $vgpr16 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $vgpr17 = COPY [[COPY17]](s32)
+  ; GCN-NEXT:   $vgpr18 = COPY [[COPY18]](s32)
+  ; GCN-NEXT:   $vgpr19 = COPY [[COPY19]](s32)
+  ; GCN-NEXT:   $vgpr20 = COPY [[COPY20]](s32)
+  ; GCN-NEXT:   $vgpr21 = COPY [[COPY21]](s32)
+  ; GCN-NEXT:   $vgpr22 = COPY [[COPY22]](s32)
+  ; GCN-NEXT:   $vgpr23 = COPY [[COPY23]](s32)
+  ; GCN-NEXT:   $vgpr24 = COPY [[COPY24]](s32)
+  ; GCN-NEXT:   $vgpr25 = COPY [[COPY25]](s32)
+  ; GCN-NEXT:   $vgpr26 = COPY [[COPY26]](s32)
+  ; GCN-NEXT:   $vgpr27 = COPY [[COPY27]](s32)
+  ; GCN-NEXT:   $vgpr28 = COPY [[COPY28]](s32)
+  ; GCN-NEXT:   $vgpr29 = COPY [[COPY29]](s32)
+  ; GCN-NEXT:   $vgpr30 = COPY [[COPY30]](s32)
+  ; GCN-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; GCN-NEXT:   G_STORE [[LOAD]](s32), [[FRAME_INDEX3]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; GCN-NEXT:   G_STORE [[LOAD1]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; GCN-NEXT:   G_STORE [[LOAD2]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
+  ; GCN-NEXT:   [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c)
   ret i32 %ret
@@ -415,91 +427,92 @@ entry:
 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i32 %b, [32 x i32] %c) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32_stack_object
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
-  ; GCN:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
-  ; GCN:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
-  ; GCN:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
-  ; GCN:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
-  ; GCN:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-  ; GCN:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
-  ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32)
-  ; GCN:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
-  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
-  ; GCN:   $vgpr2 = COPY [[COPY2]](s32)
-  ; GCN:   $vgpr3 = COPY [[COPY3]](s32)
-  ; GCN:   $vgpr4 = COPY [[COPY4]](s32)
-  ; GCN:   $vgpr5 = COPY [[COPY5]](s32)
-  ; GCN:   $vgpr6 = COPY [[COPY6]](s32)
-  ; GCN:   $vgpr7 = COPY [[COPY7]](s32)
-  ; GCN:   $vgpr8 = COPY [[COPY8]](s32)
-  ; GCN:   $vgpr9 = COPY [[COPY9]](s32)
-  ; GCN:   $vgpr10 = COPY [[COPY10]](s32)
-  ; GCN:   $vgpr11 = COPY [[COPY11]](s32)
-  ; GCN:   $vgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $vgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $vgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr15 = COPY [[COPY15]](s32)
-  ; GCN:   $vgpr16 = COPY [[COPY16]](s32)
-  ; GCN:   $vgpr17 = COPY [[COPY17]](s32)
-  ; GCN:   $vgpr18 = COPY [[COPY18]](s32)
-  ; GCN:   $vgpr19 = COPY [[COPY19]](s32)
-  ; GCN:   $vgpr20 = COPY [[COPY20]](s32)
-  ; GCN:   $vgpr21 = COPY [[COPY21]](s32)
-  ; GCN:   $vgpr22 = COPY [[COPY22]](s32)
-  ; GCN:   $vgpr23 = COPY [[COPY23]](s32)
-  ; GCN:   $vgpr24 = COPY [[COPY24]](s32)
-  ; GCN:   $vgpr25 = COPY [[COPY25]](s32)
-  ; GCN:   $vgpr26 = COPY [[COPY26]](s32)
-  ; GCN:   $vgpr27 = COPY [[COPY27]](s32)
-  ; GCN:   $vgpr28 = COPY [[COPY28]](s32)
-  ; GCN:   $vgpr29 = COPY [[COPY29]](s32)
-  ; GCN:   $vgpr30 = COPY [[COPY30]](s32)
-  ; GCN:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; GCN:   G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; GCN:   G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
-  ; GCN:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; GCN:   G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
-  ; GCN:   [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; GCN-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; GCN-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; GCN-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; GCN-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+  ; GCN-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+  ; GCN-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
+  ; GCN-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+  ; GCN-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32)
+  ; GCN-NEXT:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
+  ; GCN-NEXT:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN-NEXT:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   $vgpr2 = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   $vgpr3 = COPY [[COPY3]](s32)
+  ; GCN-NEXT:   $vgpr4 = COPY [[COPY4]](s32)
+  ; GCN-NEXT:   $vgpr5 = COPY [[COPY5]](s32)
+  ; GCN-NEXT:   $vgpr6 = COPY [[COPY6]](s32)
+  ; GCN-NEXT:   $vgpr7 = COPY [[COPY7]](s32)
+  ; GCN-NEXT:   $vgpr8 = COPY [[COPY8]](s32)
+  ; GCN-NEXT:   $vgpr9 = COPY [[COPY9]](s32)
+  ; GCN-NEXT:   $vgpr10 = COPY [[COPY10]](s32)
+  ; GCN-NEXT:   $vgpr11 = COPY [[COPY11]](s32)
+  ; GCN-NEXT:   $vgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $vgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $vgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr15 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $vgpr16 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $vgpr17 = COPY [[COPY17]](s32)
+  ; GCN-NEXT:   $vgpr18 = COPY [[COPY18]](s32)
+  ; GCN-NEXT:   $vgpr19 = COPY [[COPY19]](s32)
+  ; GCN-NEXT:   $vgpr20 = COPY [[COPY20]](s32)
+  ; GCN-NEXT:   $vgpr21 = COPY [[COPY21]](s32)
+  ; GCN-NEXT:   $vgpr22 = COPY [[COPY22]](s32)
+  ; GCN-NEXT:   $vgpr23 = COPY [[COPY23]](s32)
+  ; GCN-NEXT:   $vgpr24 = COPY [[COPY24]](s32)
+  ; GCN-NEXT:   $vgpr25 = COPY [[COPY25]](s32)
+  ; GCN-NEXT:   $vgpr26 = COPY [[COPY26]](s32)
+  ; GCN-NEXT:   $vgpr27 = COPY [[COPY27]](s32)
+  ; GCN-NEXT:   $vgpr28 = COPY [[COPY28]](s32)
+  ; GCN-NEXT:   $vgpr29 = COPY [[COPY29]](s32)
+  ; GCN-NEXT:   $vgpr30 = COPY [[COPY30]](s32)
+  ; GCN-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; GCN-NEXT:   G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; GCN-NEXT:   G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; GCN-NEXT:   G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
+  ; GCN-NEXT:   [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
@@ -514,62 +527,63 @@ entry:
 define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
   ; GCN-LABEL: name: no_sibling_call_callee_more_stack_space
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
-  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
-  ; GCN:   $vgpr2 = COPY [[C]](s32)
-  ; GCN:   $vgpr3 = COPY [[C]](s32)
-  ; GCN:   $vgpr4 = COPY [[C]](s32)
-  ; GCN:   $vgpr5 = COPY [[C]](s32)
-  ; GCN:   $vgpr6 = COPY [[C]](s32)
-  ; GCN:   $vgpr7 = COPY [[C]](s32)
-  ; GCN:   $vgpr8 = COPY [[C]](s32)
-  ; GCN:   $vgpr9 = COPY [[C]](s32)
-  ; GCN:   $vgpr10 = COPY [[C]](s32)
-  ; GCN:   $vgpr11 = COPY [[C]](s32)
-  ; GCN:   $vgpr12 = COPY [[C]](s32)
-  ; GCN:   $vgpr13 = COPY [[C]](s32)
-  ; GCN:   $vgpr14 = COPY [[C]](s32)
-  ; GCN:   $vgpr15 = COPY [[C]](s32)
-  ; GCN:   $vgpr16 = COPY [[C]](s32)
-  ; GCN:   $vgpr17 = COPY [[C]](s32)
-  ; GCN:   $vgpr18 = COPY [[C]](s32)
-  ; GCN:   $vgpr19 = COPY [[C]](s32)
-  ; GCN:   $vgpr20 = COPY [[C]](s32)
-  ; GCN:   $vgpr21 = COPY [[C]](s32)
-  ; GCN:   $vgpr22 = COPY [[C]](s32)
-  ; GCN:   $vgpr23 = COPY [[C]](s32)
-  ; GCN:   $vgpr24 = COPY [[C]](s32)
-  ; GCN:   $vgpr25 = COPY [[C]](s32)
-  ; GCN:   $vgpr26 = COPY [[C]](s32)
-  ; GCN:   $vgpr27 = COPY [[C]](s32)
-  ; GCN:   $vgpr28 = COPY [[C]](s32)
-  ; GCN:   $vgpr29 = COPY [[C]](s32)
-  ; GCN:   $vgpr30 = COPY [[C]](s32)
-  ; GCN:   [[COPY3:%[0-9]+]]:_(p5) = COPY $sgpr32
-  ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C1]](s32)
-  ; GCN:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; GCN:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C2]](s32)
-  ; GCN:   G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; GCN:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C3]](s32)
-  ; GCN:   G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
-  ; GCN:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
-  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   ADJCALLSTACKDOWN 0, 12, implicit-def $scc
-  ; GCN:   $vgpr0 = COPY [[COPY5]](s32)
-  ; GCN:   [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; GCN:   S_SETPC_B64_return [[COPY6]], implicit $vgpr0
+  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
+  ; GCN-NEXT:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN-NEXT:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   $vgpr2 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr3 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr4 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr5 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr6 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr7 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr8 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr9 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr10 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr11 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr12 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr13 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr14 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr15 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr16 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr17 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr18 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr19 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr20 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr21 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr22 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr23 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr24 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr25 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr26 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr27 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr28 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr29 = COPY [[C]](s32)
+  ; GCN-NEXT:   $vgpr30 = COPY [[C]](s32)
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(p5) = COPY $sgpr32
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C1]](s32)
+  ; GCN-NEXT:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; GCN-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C2]](s32)
+  ; GCN-NEXT:   G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; GCN-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C3]](s32)
+  ; GCN-NEXT:   G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 12, implicit-def $scc
+  ; GCN-NEXT:   $vgpr0 = COPY [[COPY5]](s32)
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; GCN-NEXT:   S_SETPC_B64_return [[COPY6]], implicit $vgpr0
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] zeroinitializer)
   ret i32 %ret
@@ -579,27 +593,28 @@ entry:
 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_other_call
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
-  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
-  ; GCN:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
-  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   [[GV1:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @sibling_call_i32_fastcc_i32_i32
-  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
-  ; GCN:   $vgpr2 = COPY [[COPY5]](s32)
-  ; GCN:   [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>)
-  ; GCN:   SI_TCRETURN [[GV1]](p0), @sibling_call_i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
+  ; GCN-NEXT:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN-NEXT:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; GCN-NEXT:   [[GV1:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @sibling_call_i32_fastcc_i32_i32
+  ; GCN-NEXT:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN-NEXT:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   $vgpr2 = COPY [[COPY5]](s32)
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>)
+  ; GCN-NEXT:   SI_TCRETURN [[GV1]](p0), @sibling_call_i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %other.call = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b)
   %ret = tail call fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %other.call)
@@ -611,91 +626,92 @@ entry:
 define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 {
   ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
-  ; GCN:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
-  ; GCN:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
-  ; GCN:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
-  ; GCN:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
-  ; GCN:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-  ; GCN:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
-  ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32)
-  ; GCN:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
-  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
-  ; GCN:   $vgpr2 = COPY [[COPY2]](s32)
-  ; GCN:   $vgpr3 = COPY [[COPY3]](s32)
-  ; GCN:   $vgpr4 = COPY [[COPY4]](s32)
-  ; GCN:   $vgpr5 = COPY [[COPY5]](s32)
-  ; GCN:   $vgpr6 = COPY [[COPY6]](s32)
-  ; GCN:   $vgpr7 = COPY [[COPY7]](s32)
-  ; GCN:   $vgpr8 = COPY [[COPY8]](s32)
-  ; GCN:   $vgpr9 = COPY [[COPY9]](s32)
-  ; GCN:   $vgpr10 = COPY [[COPY10]](s32)
-  ; GCN:   $vgpr11 = COPY [[COPY11]](s32)
-  ; GCN:   $vgpr12 = COPY [[COPY12]](s32)
-  ; GCN:   $vgpr13 = COPY [[COPY13]](s32)
-  ; GCN:   $vgpr14 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr15 = COPY [[COPY15]](s32)
-  ; GCN:   $vgpr16 = COPY [[COPY16]](s32)
-  ; GCN:   $vgpr17 = COPY [[COPY17]](s32)
-  ; GCN:   $vgpr18 = COPY [[COPY18]](s32)
-  ; GCN:   $vgpr19 = COPY [[COPY19]](s32)
-  ; GCN:   $vgpr20 = COPY [[COPY20]](s32)
-  ; GCN:   $vgpr21 = COPY [[COPY21]](s32)
-  ; GCN:   $vgpr22 = COPY [[COPY22]](s32)
-  ; GCN:   $vgpr23 = COPY [[COPY23]](s32)
-  ; GCN:   $vgpr24 = COPY [[COPY24]](s32)
-  ; GCN:   $vgpr25 = COPY [[COPY25]](s32)
-  ; GCN:   $vgpr26 = COPY [[COPY26]](s32)
-  ; GCN:   $vgpr27 = COPY [[COPY27]](s32)
-  ; GCN:   $vgpr28 = COPY [[COPY28]](s32)
-  ; GCN:   $vgpr29 = COPY [[COPY29]](s32)
-  ; GCN:   $vgpr30 = COPY [[COPY30]](s32)
-  ; GCN:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; GCN:   G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; GCN:   G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
-  ; GCN:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; GCN:   G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
-  ; GCN:   [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; GCN-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; GCN-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; GCN-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; GCN-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+  ; GCN-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+  ; GCN-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
+  ; GCN-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+  ; GCN-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32)
+  ; GCN-NEXT:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
+  ; GCN-NEXT:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN-NEXT:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   $vgpr2 = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   $vgpr3 = COPY [[COPY3]](s32)
+  ; GCN-NEXT:   $vgpr4 = COPY [[COPY4]](s32)
+  ; GCN-NEXT:   $vgpr5 = COPY [[COPY5]](s32)
+  ; GCN-NEXT:   $vgpr6 = COPY [[COPY6]](s32)
+  ; GCN-NEXT:   $vgpr7 = COPY [[COPY7]](s32)
+  ; GCN-NEXT:   $vgpr8 = COPY [[COPY8]](s32)
+  ; GCN-NEXT:   $vgpr9 = COPY [[COPY9]](s32)
+  ; GCN-NEXT:   $vgpr10 = COPY [[COPY10]](s32)
+  ; GCN-NEXT:   $vgpr11 = COPY [[COPY11]](s32)
+  ; GCN-NEXT:   $vgpr12 = COPY [[COPY12]](s32)
+  ; GCN-NEXT:   $vgpr13 = COPY [[COPY13]](s32)
+  ; GCN-NEXT:   $vgpr14 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $vgpr15 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $vgpr16 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $vgpr17 = COPY [[COPY17]](s32)
+  ; GCN-NEXT:   $vgpr18 = COPY [[COPY18]](s32)
+  ; GCN-NEXT:   $vgpr19 = COPY [[COPY19]](s32)
+  ; GCN-NEXT:   $vgpr20 = COPY [[COPY20]](s32)
+  ; GCN-NEXT:   $vgpr21 = COPY [[COPY21]](s32)
+  ; GCN-NEXT:   $vgpr22 = COPY [[COPY22]](s32)
+  ; GCN-NEXT:   $vgpr23 = COPY [[COPY23]](s32)
+  ; GCN-NEXT:   $vgpr24 = COPY [[COPY24]](s32)
+  ; GCN-NEXT:   $vgpr25 = COPY [[COPY25]](s32)
+  ; GCN-NEXT:   $vgpr26 = COPY [[COPY26]](s32)
+  ; GCN-NEXT:   $vgpr27 = COPY [[COPY27]](s32)
+  ; GCN-NEXT:   $vgpr28 = COPY [[COPY28]](s32)
+  ; GCN-NEXT:   $vgpr29 = COPY [[COPY29]](s32)
+  ; GCN-NEXT:   $vgpr30 = COPY [[COPY30]](s32)
+  ; GCN-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; GCN-NEXT:   G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; GCN-NEXT:   G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; GCN-NEXT:   G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
+  ; GCN-NEXT:   [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
@@ -707,100 +723,101 @@ entry:
 define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area(i32 %a, i32 %b, [36 x i32] %c) #1 {
   ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9
-  ; GCN:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.9, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
-  ; GCN:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
-  ; GCN:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.7, align 8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
-  ; GCN:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5)
-  ; GCN:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
-  ; GCN:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
-  ; GCN:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
-  ; GCN:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
-  ; GCN:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
-  ; GCN:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-  ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GCN:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX7]], [[C2]](s32)
-  ; GCN:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
-  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
-  ; GCN:   $vgpr2 = COPY [[C1]](s32)
-  ; GCN:   $vgpr3 = COPY [[C1]](s32)
-  ; GCN:   $vgpr4 = COPY [[C1]](s32)
-  ; GCN:   $vgpr5 = COPY [[C1]](s32)
-  ; GCN:   $vgpr6 = COPY [[C1]](s32)
-  ; GCN:   $vgpr7 = COPY [[C1]](s32)
-  ; GCN:   $vgpr8 = COPY [[C1]](s32)
-  ; GCN:   $vgpr9 = COPY [[C1]](s32)
-  ; GCN:   $vgpr10 = COPY [[C1]](s32)
-  ; GCN:   $vgpr11 = COPY [[C1]](s32)
-  ; GCN:   $vgpr12 = COPY [[C1]](s32)
-  ; GCN:   $vgpr13 = COPY [[C1]](s32)
-  ; GCN:   $vgpr14 = COPY [[C1]](s32)
-  ; GCN:   $vgpr15 = COPY [[C1]](s32)
-  ; GCN:   $vgpr16 = COPY [[C1]](s32)
-  ; GCN:   $vgpr17 = COPY [[C1]](s32)
-  ; GCN:   $vgpr18 = COPY [[C1]](s32)
-  ; GCN:   $vgpr19 = COPY [[C1]](s32)
-  ; GCN:   $vgpr20 = COPY [[C1]](s32)
-  ; GCN:   $vgpr21 = COPY [[C1]](s32)
-  ; GCN:   $vgpr22 = COPY [[C1]](s32)
-  ; GCN:   $vgpr23 = COPY [[C1]](s32)
-  ; GCN:   $vgpr24 = COPY [[C1]](s32)
-  ; GCN:   $vgpr25 = COPY [[C1]](s32)
-  ; GCN:   $vgpr26 = COPY [[C1]](s32)
-  ; GCN:   $vgpr27 = COPY [[C1]](s32)
-  ; GCN:   $vgpr28 = COPY [[C1]](s32)
-  ; GCN:   $vgpr29 = COPY [[C1]](s32)
-  ; GCN:   $vgpr30 = COPY [[C1]](s32)
-  ; GCN:   [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; GCN:   G_STORE [[C1]](s32), [[FRAME_INDEX8]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; GCN:   G_STORE [[C1]](s32), [[FRAME_INDEX9]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
-  ; GCN:   [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; GCN:   G_STORE [[C1]](s32), [[FRAME_INDEX10]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
-  ; GCN:   [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; GCN-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; GCN-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; GCN-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; GCN-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.9, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
+  ; GCN-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
+  ; GCN-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.7, align 8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
+  ; GCN-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
+  ; GCN-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+  ; GCN-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+  ; GCN-NEXT:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
+  ; GCN-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; GCN-NEXT:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX7]], [[C2]](s32)
+  ; GCN-NEXT:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
+  ; GCN-NEXT:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN-NEXT:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   $vgpr2 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr3 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr4 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr5 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr6 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr7 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr8 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr9 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr10 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr11 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr12 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr13 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr14 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr15 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr16 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr17 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr18 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr19 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr20 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr21 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr22 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr23 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr24 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr25 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr26 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr27 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr28 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr29 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr30 = COPY [[C1]](s32)
+  ; GCN-NEXT:   [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; GCN-NEXT:   G_STORE [[C1]](s32), [[FRAME_INDEX8]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; GCN-NEXT:   G_STORE [[C1]](s32), [[FRAME_INDEX9]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; GCN-NEXT:   G_STORE [[C1]](s32), [[FRAME_INDEX10]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
+  ; GCN-NEXT:   [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
@@ -814,156 +831,157 @@ declare hidden void @void_fastcc_multi_byval(i32 %a, [3 x i32] addrspace(5)* byv
 define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
   ; GCN-LABEL: name: sibling_call_fastcc_multi_byval
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; GCN:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; GCN:   [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; GCN:   [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; GCN:   [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; GCN:   [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; GCN:   [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; GCN:   [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; GCN:   [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35
-  ; GCN:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.35, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.34
-  ; GCN:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.34, addrspace 5)
-  ; GCN:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.33
-  ; GCN:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.33, align 8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.32
-  ; GCN:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.32, addrspace 5)
-  ; GCN:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31
-  ; GCN:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.31, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30
-  ; GCN:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.30, addrspace 5)
-  ; GCN:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29
-  ; GCN:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.29, align 8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28
-  ; GCN:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.28, addrspace 5)
-  ; GCN:   [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27
-  ; GCN:   [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.27, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26
-  ; GCN:   [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.26, addrspace 5)
-  ; GCN:   [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25
-  ; GCN:   [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.25, align 8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24
-  ; GCN:   [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.24, addrspace 5)
-  ; GCN:   [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23
-  ; GCN:   [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.23, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22
-  ; GCN:   [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.22, addrspace 5)
-  ; GCN:   [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21
-  ; GCN:   [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.21, align 8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20
-  ; GCN:   [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.20, addrspace 5)
-  ; GCN:   [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19
-  ; GCN:   [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.19, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18
-  ; GCN:   [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (s32) from %fixed-stack.18, addrspace 5)
-  ; GCN:   [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17
-  ; GCN:   [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (s32) from %fixed-stack.17, align 8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16
-  ; GCN:   [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (s32) from %fixed-stack.16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15
-  ; GCN:   [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (s32) from %fixed-stack.15, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14
-  ; GCN:   [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (s32) from %fixed-stack.14, addrspace 5)
-  ; GCN:   [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13
-  ; GCN:   [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (s32) from %fixed-stack.13, align 8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12
-  ; GCN:   [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (s32) from %fixed-stack.12, addrspace 5)
-  ; GCN:   [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11
-  ; GCN:   [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (s32) from %fixed-stack.11, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10
-  ; GCN:   [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (s32) from %fixed-stack.10, addrspace 5)
-  ; GCN:   [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9
-  ; GCN:   [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (s32) from %fixed-stack.9, align 8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
-  ; GCN:   [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (s32) from %fixed-stack.8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
-  ; GCN:   [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (s32) from %fixed-stack.7, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
-  ; GCN:   [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5)
-  ; GCN:   [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
-  ; GCN:   [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (s32) from %fixed-stack.5, align 8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
-  ; GCN:   [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
-  ; GCN:   [[FRAME_INDEX32:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
-  ; GCN:   [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; GCN:   [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5)
-  ; GCN:   [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-  ; GCN:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN:   [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca0
-  ; GCN:   [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1.alloca1
-  ; GCN:   G_STORE [[C]](s32), [[FRAME_INDEX34]](p5) :: (store (s32) into %ir.alloca0, addrspace 5)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C2]](s32)
-  ; GCN:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.alloca0 + 4, addrspace 5)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; GCN:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32)
-  ; GCN:   G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca0 + 8, addrspace 5)
-  ; GCN:   G_STORE [[C1]](s64), [[FRAME_INDEX35]](p5) :: (store (s64) into %ir.alloca1, addrspace 5)
-  ; GCN:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX35]], [[C3]](s32)
-  ; GCN:   G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store (s64) into %ir.alloca1 + 8, addrspace 5)
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval
-  ; GCN:   [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   $vgpr0 = COPY [[COPY8]](s32)
-  ; GCN:   [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; GCN:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; GCN:   G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca0, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; GCN:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-  ; GCN:   G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](s32), 0 :: (dereferenceable store (s128) into %fixed-stack.0, addrspace 5), (dereferenceable load (s128) from %ir.alloca1, align 8, addrspace 5)
-  ; GCN:   [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY40]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY41]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY42]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY43]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY44]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY45]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY46]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY47]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @void_fastcc_multi_byval, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; GCN-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; GCN-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; GCN-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; GCN-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; GCN-NEXT:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; GCN-NEXT:   [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; GCN-NEXT:   [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; GCN-NEXT:   [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; GCN-NEXT:   [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; GCN-NEXT:   [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; GCN-NEXT:   [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; GCN-NEXT:   [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.35, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.34
+  ; GCN-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.34, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.33
+  ; GCN-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.33, align 8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.32
+  ; GCN-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.32, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31
+  ; GCN-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.31, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30
+  ; GCN-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.30, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29
+  ; GCN-NEXT:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.29, align 8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28
+  ; GCN-NEXT:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.28, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27
+  ; GCN-NEXT:   [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.27, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26
+  ; GCN-NEXT:   [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.26, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25
+  ; GCN-NEXT:   [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.25, align 8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24
+  ; GCN-NEXT:   [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.24, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23
+  ; GCN-NEXT:   [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.23, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22
+  ; GCN-NEXT:   [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.22, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21
+  ; GCN-NEXT:   [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.21, align 8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20
+  ; GCN-NEXT:   [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.20, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19
+  ; GCN-NEXT:   [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.19, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18
+  ; GCN-NEXT:   [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (s32) from %fixed-stack.18, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17
+  ; GCN-NEXT:   [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (s32) from %fixed-stack.17, align 8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16
+  ; GCN-NEXT:   [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (s32) from %fixed-stack.16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15
+  ; GCN-NEXT:   [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (s32) from %fixed-stack.15, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14
+  ; GCN-NEXT:   [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (s32) from %fixed-stack.14, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13
+  ; GCN-NEXT:   [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (s32) from %fixed-stack.13, align 8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12
+  ; GCN-NEXT:   [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (s32) from %fixed-stack.12, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11
+  ; GCN-NEXT:   [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (s32) from %fixed-stack.11, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10
+  ; GCN-NEXT:   [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (s32) from %fixed-stack.10, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9
+  ; GCN-NEXT:   [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (s32) from %fixed-stack.9, align 8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
+  ; GCN-NEXT:   [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (s32) from %fixed-stack.8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
+  ; GCN-NEXT:   [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (s32) from %fixed-stack.7, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
+  ; GCN-NEXT:   [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
+  ; GCN-NEXT:   [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (s32) from %fixed-stack.5, align 8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+  ; GCN-NEXT:   [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX32:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+  ; GCN-NEXT:   [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; GCN-NEXT:   [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5)
+  ; GCN-NEXT:   [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; GCN-NEXT:   [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca0
+  ; GCN-NEXT:   [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1.alloca1
+  ; GCN-NEXT:   G_STORE [[C]](s32), [[FRAME_INDEX34]](p5) :: (store (s32) into %ir.alloca0, addrspace 5)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C2]](s32)
+  ; GCN-NEXT:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.alloca0 + 4, addrspace 5)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; GCN-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32)
+  ; GCN-NEXT:   G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca0 + 8, addrspace 5)
+  ; GCN-NEXT:   G_STORE [[C1]](s64), [[FRAME_INDEX35]](p5) :: (store (s64) into %ir.alloca1, addrspace 5)
+  ; GCN-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX35]], [[C3]](s32)
+  ; GCN-NEXT:   G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store (s64) into %ir.alloca1 + 8, addrspace 5)
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval
+  ; GCN-NEXT:   [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GCN-NEXT:   [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GCN-NEXT:   [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   $vgpr0 = COPY [[COPY8]](s32)
+  ; GCN-NEXT:   [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; GCN-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; GCN-NEXT:   G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca0, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; GCN-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; GCN-NEXT:   G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](s32), 0 :: (dereferenceable store (s128) into %fixed-stack.0, addrspace 5), (dereferenceable load (s128) from %ir.alloca1, align 8, addrspace 5)
+  ; GCN-NEXT:   [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY40]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY41]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[COPY42]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY43]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY44]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY45]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY46]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[COPY47]](s32)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @void_fastcc_multi_byval, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
 entry:
   %alloca0 = alloca [3 x i32], align 16, addrspace(5)
   %alloca1 = alloca [2 x i64], align 8, addrspace(5)
@@ -979,183 +997,184 @@ declare hidden void @void_fastcc_byval_and_stack_passed([3 x i32] addrspace(5)*
 define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 {
   ; GCN-LABEL: name: sibling_call_byval_and_stack_passed
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; GCN:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; GCN:   [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; GCN:   [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; GCN:   [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; GCN:   [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; GCN:   [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; GCN:   [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; GCN:   [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.36
-  ; GCN:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.36, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35
-  ; GCN:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.35, addrspace 5)
-  ; GCN:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.34
-  ; GCN:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.34, align 8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.33
-  ; GCN:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.33, addrspace 5)
-  ; GCN:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.32
-  ; GCN:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.32, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31
-  ; GCN:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.31, addrspace 5)
-  ; GCN:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30
-  ; GCN:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.30, align 8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29
-  ; GCN:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.29, addrspace 5)
-  ; GCN:   [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28
-  ; GCN:   [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.28, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27
-  ; GCN:   [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.27, addrspace 5)
-  ; GCN:   [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26
-  ; GCN:   [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.26, align 8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25
-  ; GCN:   [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.25, addrspace 5)
-  ; GCN:   [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24
-  ; GCN:   [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.24, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23
-  ; GCN:   [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.23, addrspace 5)
-  ; GCN:   [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22
-  ; GCN:   [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.22, align 8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21
-  ; GCN:   [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.21, addrspace 5)
-  ; GCN:   [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20
-  ; GCN:   [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.20, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19
-  ; GCN:   [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (s32) from %fixed-stack.19, addrspace 5)
-  ; GCN:   [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18
-  ; GCN:   [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (s32) from %fixed-stack.18, align 8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17
-  ; GCN:   [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (s32) from %fixed-stack.17, addrspace 5)
-  ; GCN:   [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16
-  ; GCN:   [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15
-  ; GCN:   [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (s32) from %fixed-stack.15, addrspace 5)
-  ; GCN:   [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14
-  ; GCN:   [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (s32) from %fixed-stack.14, align 8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13
-  ; GCN:   [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (s32) from %fixed-stack.13, addrspace 5)
-  ; GCN:   [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12
-  ; GCN:   [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (s32) from %fixed-stack.12, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11
-  ; GCN:   [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (s32) from %fixed-stack.11, addrspace 5)
-  ; GCN:   [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10
-  ; GCN:   [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (s32) from %fixed-stack.10, align 8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9
-  ; GCN:   [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (s32) from %fixed-stack.9, addrspace 5)
-  ; GCN:   [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
-  ; GCN:   [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
-  ; GCN:   [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5)
-  ; GCN:   [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
-  ; GCN:   [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5)
-  ; GCN:   [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
-  ; GCN:   [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5)
-  ; GCN:   [[FRAME_INDEX32:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
-  ; GCN:   [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5)
-  ; GCN:   [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
-  ; GCN:   [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5)
-  ; GCN:   [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
-  ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GCN:   [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
-  ; GCN:   G_STORE [[C]](s32), [[FRAME_INDEX34]](p5) :: (store (s32) into %ir.alloca, addrspace 5)
-  ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C2]](s32)
-  ; GCN:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.alloca + 4, addrspace 5)
-  ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; GCN:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32)
-  ; GCN:   G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca + 8, addrspace 5)
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed
-  ; GCN:   [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; GCN:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; GCN:   G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca, align 16, addrspace 5)
-  ; GCN:   $vgpr0 = COPY [[C1]](s32)
-  ; GCN:   $vgpr1 = COPY [[C1]](s32)
-  ; GCN:   $vgpr2 = COPY [[C1]](s32)
-  ; GCN:   $vgpr3 = COPY [[C1]](s32)
-  ; GCN:   $vgpr4 = COPY [[C1]](s32)
-  ; GCN:   $vgpr5 = COPY [[C1]](s32)
-  ; GCN:   $vgpr6 = COPY [[C1]](s32)
-  ; GCN:   $vgpr7 = COPY [[C1]](s32)
-  ; GCN:   $vgpr8 = COPY [[C1]](s32)
-  ; GCN:   $vgpr9 = COPY [[C1]](s32)
-  ; GCN:   $vgpr10 = COPY [[C1]](s32)
-  ; GCN:   $vgpr11 = COPY [[C1]](s32)
-  ; GCN:   $vgpr12 = COPY [[C1]](s32)
-  ; GCN:   $vgpr13 = COPY [[C1]](s32)
-  ; GCN:   $vgpr14 = COPY [[C1]](s32)
-  ; GCN:   $vgpr15 = COPY [[C1]](s32)
-  ; GCN:   $vgpr16 = COPY [[C1]](s32)
-  ; GCN:   $vgpr17 = COPY [[C1]](s32)
-  ; GCN:   $vgpr18 = COPY [[C1]](s32)
-  ; GCN:   $vgpr19 = COPY [[C1]](s32)
-  ; GCN:   $vgpr20 = COPY [[C1]](s32)
-  ; GCN:   $vgpr21 = COPY [[C1]](s32)
-  ; GCN:   $vgpr22 = COPY [[C1]](s32)
-  ; GCN:   $vgpr23 = COPY [[C1]](s32)
-  ; GCN:   $vgpr24 = COPY [[C1]](s32)
-  ; GCN:   $vgpr25 = COPY [[C1]](s32)
-  ; GCN:   $vgpr26 = COPY [[C1]](s32)
-  ; GCN:   $vgpr27 = COPY [[C1]](s32)
-  ; GCN:   $vgpr28 = COPY [[C1]](s32)
-  ; GCN:   $vgpr29 = COPY [[C1]](s32)
-  ; GCN:   $vgpr30 = COPY [[C1]](s32)
-  ; GCN:   [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; GCN:   G_STORE [[C1]](s32), [[FRAME_INDEX36]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
-  ; GCN:   [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; GCN:   G_STORE [[COPY8]](s32), [[FRAME_INDEX37]](p5) :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
-  ; GCN:   [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY40]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY41]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY42]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY43]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY44]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY45]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY46]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY47]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @void_fastcc_byval_and_stack_passed, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; GCN-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; GCN-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; GCN-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; GCN-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; GCN-NEXT:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; GCN-NEXT:   [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; GCN-NEXT:   [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; GCN-NEXT:   [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; GCN-NEXT:   [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; GCN-NEXT:   [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; GCN-NEXT:   [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; GCN-NEXT:   [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.36
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.36, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35
+  ; GCN-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.35, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.34
+  ; GCN-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.34, align 8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.33
+  ; GCN-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.33, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.32
+  ; GCN-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.32, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31
+  ; GCN-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.31, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30
+  ; GCN-NEXT:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.30, align 8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29
+  ; GCN-NEXT:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.29, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28
+  ; GCN-NEXT:   [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.28, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27
+  ; GCN-NEXT:   [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.27, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26
+  ; GCN-NEXT:   [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.26, align 8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25
+  ; GCN-NEXT:   [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.25, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24
+  ; GCN-NEXT:   [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.24, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23
+  ; GCN-NEXT:   [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.23, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22
+  ; GCN-NEXT:   [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.22, align 8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21
+  ; GCN-NEXT:   [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.21, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20
+  ; GCN-NEXT:   [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.20, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19
+  ; GCN-NEXT:   [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (s32) from %fixed-stack.19, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18
+  ; GCN-NEXT:   [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (s32) from %fixed-stack.18, align 8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17
+  ; GCN-NEXT:   [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (s32) from %fixed-stack.17, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16
+  ; GCN-NEXT:   [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15
+  ; GCN-NEXT:   [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (s32) from %fixed-stack.15, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14
+  ; GCN-NEXT:   [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (s32) from %fixed-stack.14, align 8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13
+  ; GCN-NEXT:   [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (s32) from %fixed-stack.13, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12
+  ; GCN-NEXT:   [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (s32) from %fixed-stack.12, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11
+  ; GCN-NEXT:   [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (s32) from %fixed-stack.11, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10
+  ; GCN-NEXT:   [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (s32) from %fixed-stack.10, align 8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9
+  ; GCN-NEXT:   [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (s32) from %fixed-stack.9, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
+  ; GCN-NEXT:   [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
+  ; GCN-NEXT:   [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
+  ; GCN-NEXT:   [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
+  ; GCN-NEXT:   [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX32:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+  ; GCN-NEXT:   [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+  ; GCN-NEXT:   [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5)
+  ; GCN-NEXT:   [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+  ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; GCN-NEXT:   [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
+  ; GCN-NEXT:   G_STORE [[C]](s32), [[FRAME_INDEX34]](p5) :: (store (s32) into %ir.alloca, addrspace 5)
+  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C2]](s32)
+  ; GCN-NEXT:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.alloca + 4, addrspace 5)
+  ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; GCN-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32)
+  ; GCN-NEXT:   G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca + 8, addrspace 5)
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed
+  ; GCN-NEXT:   [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GCN-NEXT:   [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GCN-NEXT:   [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; GCN-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; GCN-NEXT:   G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca, align 16, addrspace 5)
+  ; GCN-NEXT:   $vgpr0 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr1 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr2 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr3 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr4 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr5 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr6 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr7 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr8 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr9 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr10 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr11 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr12 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr13 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr14 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr15 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr16 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr17 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr18 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr19 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr20 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr21 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr22 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr23 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr24 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr25 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr26 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr27 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr28 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr29 = COPY [[C1]](s32)
+  ; GCN-NEXT:   $vgpr30 = COPY [[C1]](s32)
+  ; GCN-NEXT:   [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; GCN-NEXT:   G_STORE [[C1]](s32), [[FRAME_INDEX36]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
+  ; GCN-NEXT:   [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; GCN-NEXT:   G_STORE [[COPY8]](s32), [[FRAME_INDEX37]](p5) :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+  ; GCN-NEXT:   [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY40]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY41]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[COPY42]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY43]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY44]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY45]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY46]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[COPY47]](s32)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @void_fastcc_byval_and_stack_passed, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
 entry:
   %alloca = alloca [3 x i32], align 16, addrspace(5)
   store [3 x i32] [i32 9, i32 9, i32 9], [3 x i32] addrspace(5)* %alloca
@@ -1168,42 +1187,43 @@ declare hidden fastcc i64 @i64_fastcc_i64(i64 %arg0)
 define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) #1 {
   ; GCN-LABEL: name: sibling_call_i64_fastcc_i64
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
-  ; GCN:   [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i64_fastcc_i64
-  ; GCN:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
-  ; GCN:   $vgpr0 = COPY [[UV]](s32)
-  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
-  ; GCN:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY15]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY16]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY17]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY18]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i64_fastcc_i64, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i64_fastcc_i64
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
+  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[COPY18]](s32)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @i64_fastcc_i64, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
 entry:
   %ret = tail call fastcc i64 @i64_fastcc_i64(i64 %a)
   ret i64 %ret
@@ -1214,42 +1234,43 @@ declare hidden fastcc i8 addrspace(1)* @p1i8_fastcc_p1i8(i8 addrspace(1)* %arg0)
 define hidden fastcc i8 addrspace(1)* @sibling_call_p1i8_fastcc_p1i8(i8 addrspace(1)* %a) #1 {
   ; GCN-LABEL: name: sibling_call_p1i8_fastcc_p1i8
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
-  ; GCN:   [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @p1i8_fastcc_p1i8
-  ; GCN:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1)
-  ; GCN:   $vgpr0 = COPY [[UV]](s32)
-  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
-  ; GCN:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY15]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY16]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY17]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY18]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @p1i8_fastcc_p1i8, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @p1i8_fastcc_p1i8
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1)
+  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[COPY18]](s32)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @p1i8_fastcc_p1i8, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
 entry:
   %ret = tail call fastcc i8 addrspace(1)* @p1i8_fastcc_p1i8(i8 addrspace(1)* %a)
   ret i8 addrspace(1)* %ret
@@ -1260,40 +1281,41 @@ declare hidden fastcc i16 @i16_fastcc_i16(i16 %arg0)
 define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) #1 {
   ; GCN-LABEL: name: sibling_call_i16_fastcc_i16
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32)
-  ; GCN:   [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i16_fastcc_i16
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
-  ; GCN:   $vgpr0 = COPY [[ANYEXT]](s32)
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY12]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY17]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i16_fastcc_i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32)
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i16_fastcc_i16
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
+  ; GCN-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[COPY12]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[COPY17]](s32)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @i16_fastcc_i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
 entry:
   %ret = tail call fastcc i16 @i16_fastcc_i16(i16 %a)
   ret i16 %ret
@@ -1304,40 +1326,41 @@ declare hidden fastcc half @f16_fastcc_f16(half %arg0)
 define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) #1 {
   ; GCN-LABEL: name: sibling_call_f16_fastcc_f16
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32)
-  ; GCN:   [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @f16_fastcc_f16
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
-  ; GCN:   $vgpr0 = COPY [[ANYEXT]](s32)
-  ; GCN:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY12]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY17]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @f16_fastcc_f16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32)
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @f16_fastcc_f16
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
+  ; GCN-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[COPY12]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[COPY17]](s32)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @f16_fastcc_f16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
 entry:
   %ret = tail call fastcc half @f16_fastcc_f16(half %a)
   ret half %ret
@@ -1348,46 +1371,47 @@ declare hidden fastcc <3 x i16> @v3i16_fastcc_v3i16(<3 x i16> %arg0)
 define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 {
   ; GCN-LABEL: name: sibling_call_v3i16_fastcc_v3i16
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; GCN:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-  ; GCN:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[DEF]](<2 x s16>)
-  ; GCN:   [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-  ; GCN:   [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16
-  ; GCN:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
-  ; GCN:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[DEF1]](<3 x s16>)
-  ; GCN:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
-  ; GCN:   $vgpr0 = COPY [[UV2]](<2 x s16>)
-  ; GCN:   $vgpr1 = COPY [[UV3]](<2 x s16>)
-  ; GCN:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY15]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY16]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY17]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY18]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @v3i16_fastcc_v3i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[DEF]](<2 x s16>)
+  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
+  ; GCN-NEXT:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[DEF1]](<3 x s16>)
+  ; GCN-NEXT:   [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
+  ; GCN-NEXT:   $vgpr0 = COPY [[UV2]](<2 x s16>)
+  ; GCN-NEXT:   $vgpr1 = COPY [[UV3]](<2 x s16>)
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[COPY18]](s32)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @v3i16_fastcc_v3i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
 entry:
   %ret = tail call fastcc <3 x i16> @v3i16_fastcc_v3i16(<3 x i16> %a)
   ret <3 x i16> %ret
@@ -1398,42 +1422,43 @@ declare hidden fastcc <4 x i16> @v4i16_fastcc_v4i16(<4 x i16> %arg0)
 define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) #1 {
   ; GCN-LABEL: name: sibling_call_v4i16_fastcc_v4i16
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; GCN:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>)
-  ; GCN:   [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v4i16_fastcc_v4i16
-  ; GCN:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
-  ; GCN:   $vgpr0 = COPY [[UV]](<2 x s16>)
-  ; GCN:   $vgpr1 = COPY [[UV1]](<2 x s16>)
-  ; GCN:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY15]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY16]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY17]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY18]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @v4i16_fastcc_v4i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; GCN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v4i16_fastcc_v4i16
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
+  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
+  ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[COPY18]](s32)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @v4i16_fastcc_v4i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
 entry:
   %ret = tail call fastcc <4 x i16> @v4i16_fastcc_v4i16(<4 x i16> %a)
   ret <4 x i16> %ret
@@ -1444,48 +1469,49 @@ declare hidden fastcc <2 x i64> @v2i64_fastcc_v2i64(<2 x i64> %arg0)
 define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1 {
   ; GCN-LABEL: name: sibling_call_v2i64_fastcc_v2i64
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
-  ; GCN:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
-  ; GCN:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-  ; GCN:   [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v2i64_fastcc_v2i64
-  ; GCN:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>)
-  ; GCN:   $vgpr0 = COPY [[UV]](s32)
-  ; GCN:   $vgpr1 = COPY [[UV1]](s32)
-  ; GCN:   $vgpr2 = COPY [[UV2]](s32)
-  ; GCN:   $vgpr3 = COPY [[UV3]](s32)
-  ; GCN:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY13]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY14]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY15]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY16]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY17]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY18]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY19]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY20]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @v2i64_fastcc_v2i64, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+  ; GCN-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+  ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v2i64_fastcc_v2i64
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>)
+  ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; GCN-NEXT:   $vgpr2 = COPY [[UV2]](s32)
+  ; GCN-NEXT:   $vgpr3 = COPY [[UV3]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY13]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY14]](p4)
+  ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[COPY15]](p4)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY16]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY17]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY18]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY19]](s32)
+  ; GCN-NEXT:   $vgpr31 = COPY [[COPY20]](s32)
+  ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @v2i64_fastcc_v2i64, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
 entry:
   %ret = tail call fastcc <2 x i64> @v2i64_fastcc_v2i64(<2 x i64> %a)
   ret <2 x i64> %ret

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/extend_args.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/extend_args.ll
index f97306ab0b13f..4823ebef14625 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/extend_args.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/extend_args.ll
@@ -5,13 +5,14 @@
 define  signext i8 @sext_arg_i8(i8 signext %a) {
   ; MIPS32-LABEL: name: sext_arg_i8
   ; MIPS32: bb.1.entry:
-  ; MIPS32:   liveins: $a0
-  ; MIPS32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; MIPS32:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 8
-  ; MIPS32:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
-  ; MIPS32:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)
-  ; MIPS32:   $v0 = COPY [[SEXT]](s32)
-  ; MIPS32:   RetRA implicit $v0
+  ; MIPS32-NEXT:   liveins: $a0
+  ; MIPS32-NEXT: {{  $}}
+  ; MIPS32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32-NEXT:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 8
+  ; MIPS32-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
+  ; MIPS32-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)
+  ; MIPS32-NEXT:   $v0 = COPY [[SEXT]](s32)
+  ; MIPS32-NEXT:   RetRA implicit $v0
 entry:
   ret i8 %a
 }
@@ -19,13 +20,14 @@ entry:
 define zeroext i8 @zext_arg_i8(i8 zeroext %a) {
   ; MIPS32-LABEL: name: zext_arg_i8
   ; MIPS32: bb.1.entry:
-  ; MIPS32:   liveins: $a0
-  ; MIPS32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; MIPS32:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 8
-  ; MIPS32:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32)
-  ; MIPS32:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
-  ; MIPS32:   $v0 = COPY [[ZEXT]](s32)
-  ; MIPS32:   RetRA implicit $v0
+  ; MIPS32-NEXT:   liveins: $a0
+  ; MIPS32-NEXT: {{  $}}
+  ; MIPS32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 8
+  ; MIPS32-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32)
+  ; MIPS32-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
+  ; MIPS32-NEXT:   $v0 = COPY [[ZEXT]](s32)
+  ; MIPS32-NEXT:   RetRA implicit $v0
 entry:
   ret i8 %a
 }
@@ -33,12 +35,13 @@ entry:
 define i8 @aext_arg_i8(i8 %a) {
   ; MIPS32-LABEL: name: aext_arg_i8
   ; MIPS32: bb.1.entry:
-  ; MIPS32:   liveins: $a0
-  ; MIPS32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; MIPS32:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
-  ; MIPS32:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s8)
-  ; MIPS32:   $v0 = COPY [[ANYEXT]](s32)
-  ; MIPS32:   RetRA implicit $v0
+  ; MIPS32-NEXT:   liveins: $a0
+  ; MIPS32-NEXT: {{  $}}
+  ; MIPS32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+  ; MIPS32-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s8)
+  ; MIPS32-NEXT:   $v0 = COPY [[ANYEXT]](s32)
+  ; MIPS32-NEXT:   RetRA implicit $v0
 entry:
   ret i8 %a
 }
@@ -50,33 +53,34 @@ declare i8 @aext_stack_arg_i8(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i8 %a)
 define signext i8 @call_sext_stack_arg_i8(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i8 signext %a) {
   ; MIPS32-LABEL: name: call_sext_stack_arg_i8
   ; MIPS32: bb.1.entry:
-  ; MIPS32:   liveins: $a0, $a1, $a2, $a3
-  ; MIPS32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; MIPS32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-  ; MIPS32:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
-  ; MIPS32:   [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
-  ; MIPS32:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; MIPS32:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8)
-  ; MIPS32:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[LOAD]], 8
-  ; MIPS32:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
-  ; MIPS32:   ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
-  ; MIPS32:   $a0 = COPY [[COPY]](s32)
-  ; MIPS32:   $a1 = COPY [[COPY1]](s32)
-  ; MIPS32:   $a2 = COPY [[COPY2]](s32)
-  ; MIPS32:   $a3 = COPY [[COPY3]](s32)
-  ; MIPS32:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)
-  ; MIPS32:   [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
-  ; MIPS32:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-  ; MIPS32:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
-  ; MIPS32:   G_STORE [[SEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
-  ; MIPS32:   JAL @sext_stack_arg_i8, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
-  ; MIPS32:   [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
-  ; MIPS32:   [[ASSERT_SEXT1:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY5]], 8
-  ; MIPS32:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT1]](s32)
-  ; MIPS32:   ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp
-  ; MIPS32:   [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s8)
-  ; MIPS32:   $v0 = COPY [[SEXT1]](s32)
-  ; MIPS32:   RetRA implicit $v0
+  ; MIPS32-NEXT:   liveins: $a0, $a1, $a2, $a3
+  ; MIPS32-NEXT: {{  $}}
+  ; MIPS32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; MIPS32-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+  ; MIPS32-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+  ; MIPS32-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; MIPS32-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8)
+  ; MIPS32-NEXT:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[LOAD]], 8
+  ; MIPS32-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
+  ; MIPS32-NEXT:   ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
+  ; MIPS32-NEXT:   $a0 = COPY [[COPY]](s32)
+  ; MIPS32-NEXT:   $a1 = COPY [[COPY1]](s32)
+  ; MIPS32-NEXT:   $a2 = COPY [[COPY2]](s32)
+  ; MIPS32-NEXT:   $a3 = COPY [[COPY3]](s32)
+  ; MIPS32-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)
+  ; MIPS32-NEXT:   [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
+  ; MIPS32-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; MIPS32-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
+  ; MIPS32-NEXT:   G_STORE [[SEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
+  ; MIPS32-NEXT:   JAL @sext_stack_arg_i8, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
+  ; MIPS32-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
+  ; MIPS32-NEXT:   [[ASSERT_SEXT1:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY5]], 8
+  ; MIPS32-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT1]](s32)
+  ; MIPS32-NEXT:   ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp
+  ; MIPS32-NEXT:   [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s8)
+  ; MIPS32-NEXT:   $v0 = COPY [[SEXT1]](s32)
+  ; MIPS32-NEXT:   RetRA implicit $v0
 entry:
   %call = call signext i8 @sext_stack_arg_i8(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i8 signext %a)
   ret i8 %call
@@ -85,33 +89,34 @@ entry:
 define zeroext i8 @call_zext_stack_arg_i8(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i8 zeroext %a) {
   ; MIPS32-LABEL: name: call_zext_stack_arg_i8
   ; MIPS32: bb.1.entry:
-  ; MIPS32:   liveins: $a0, $a1, $a2, $a3
-  ; MIPS32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; MIPS32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-  ; MIPS32:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
-  ; MIPS32:   [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
-  ; MIPS32:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; MIPS32:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8)
-  ; MIPS32:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 8
-  ; MIPS32:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32)
-  ; MIPS32:   ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
-  ; MIPS32:   $a0 = COPY [[COPY]](s32)
-  ; MIPS32:   $a1 = COPY [[COPY1]](s32)
-  ; MIPS32:   $a2 = COPY [[COPY2]](s32)
-  ; MIPS32:   $a3 = COPY [[COPY3]](s32)
-  ; MIPS32:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
-  ; MIPS32:   [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
-  ; MIPS32:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-  ; MIPS32:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
-  ; MIPS32:   G_STORE [[ZEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
-  ; MIPS32:   JAL @zext_stack_arg_i8, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
-  ; MIPS32:   [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
-  ; MIPS32:   [[ASSERT_ZEXT1:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY5]], 8
-  ; MIPS32:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT1]](s32)
-  ; MIPS32:   ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp
-  ; MIPS32:   [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC1]](s8)
-  ; MIPS32:   $v0 = COPY [[ZEXT1]](s32)
-  ; MIPS32:   RetRA implicit $v0
+  ; MIPS32-NEXT:   liveins: $a0, $a1, $a2, $a3
+  ; MIPS32-NEXT: {{  $}}
+  ; MIPS32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; MIPS32-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+  ; MIPS32-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+  ; MIPS32-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; MIPS32-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8)
+  ; MIPS32-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 8
+  ; MIPS32-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32)
+  ; MIPS32-NEXT:   ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
+  ; MIPS32-NEXT:   $a0 = COPY [[COPY]](s32)
+  ; MIPS32-NEXT:   $a1 = COPY [[COPY1]](s32)
+  ; MIPS32-NEXT:   $a2 = COPY [[COPY2]](s32)
+  ; MIPS32-NEXT:   $a3 = COPY [[COPY3]](s32)
+  ; MIPS32-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
+  ; MIPS32-NEXT:   [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
+  ; MIPS32-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; MIPS32-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
+  ; MIPS32-NEXT:   G_STORE [[ZEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
+  ; MIPS32-NEXT:   JAL @zext_stack_arg_i8, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
+  ; MIPS32-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
+  ; MIPS32-NEXT:   [[ASSERT_ZEXT1:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY5]], 8
+  ; MIPS32-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT1]](s32)
+  ; MIPS32-NEXT:   ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp
+  ; MIPS32-NEXT:   [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC1]](s8)
+  ; MIPS32-NEXT:   $v0 = COPY [[ZEXT1]](s32)
+  ; MIPS32-NEXT:   RetRA implicit $v0
 entry:
   %call = call zeroext i8 @zext_stack_arg_i8(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i8 zeroext %a)
   ret i8 %call
@@ -120,31 +125,32 @@ entry:
 define i8 @call_aext_stack_arg_i8(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i8 %a) {
   ; MIPS32-LABEL: name: call_aext_stack_arg_i8
   ; MIPS32: bb.1.entry:
-  ; MIPS32:   liveins: $a0, $a1, $a2, $a3
-  ; MIPS32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; MIPS32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-  ; MIPS32:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
-  ; MIPS32:   [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
-  ; MIPS32:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; MIPS32:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8)
-  ; MIPS32:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32)
-  ; MIPS32:   ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
-  ; MIPS32:   $a0 = COPY [[COPY]](s32)
-  ; MIPS32:   $a1 = COPY [[COPY1]](s32)
-  ; MIPS32:   $a2 = COPY [[COPY2]](s32)
-  ; MIPS32:   $a3 = COPY [[COPY3]](s32)
-  ; MIPS32:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s8)
-  ; MIPS32:   [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
-  ; MIPS32:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-  ; MIPS32:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
-  ; MIPS32:   G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
-  ; MIPS32:   JAL @aext_stack_arg_i8, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
-  ; MIPS32:   [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
-  ; MIPS32:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY5]](s32)
-  ; MIPS32:   ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp
-  ; MIPS32:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s8)
-  ; MIPS32:   $v0 = COPY [[ANYEXT1]](s32)
-  ; MIPS32:   RetRA implicit $v0
+  ; MIPS32-NEXT:   liveins: $a0, $a1, $a2, $a3
+  ; MIPS32-NEXT: {{  $}}
+  ; MIPS32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; MIPS32-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+  ; MIPS32-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+  ; MIPS32-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; MIPS32-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8)
+  ; MIPS32-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32)
+  ; MIPS32-NEXT:   ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
+  ; MIPS32-NEXT:   $a0 = COPY [[COPY]](s32)
+  ; MIPS32-NEXT:   $a1 = COPY [[COPY1]](s32)
+  ; MIPS32-NEXT:   $a2 = COPY [[COPY2]](s32)
+  ; MIPS32-NEXT:   $a3 = COPY [[COPY3]](s32)
+  ; MIPS32-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s8)
+  ; MIPS32-NEXT:   [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
+  ; MIPS32-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; MIPS32-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
+  ; MIPS32-NEXT:   G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
+  ; MIPS32-NEXT:   JAL @aext_stack_arg_i8, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
+  ; MIPS32-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
+  ; MIPS32-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY5]](s32)
+  ; MIPS32-NEXT:   ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp
+  ; MIPS32-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s8)
+  ; MIPS32-NEXT:   $v0 = COPY [[ANYEXT1]](s32)
+  ; MIPS32-NEXT:   RetRA implicit $v0
 entry:
   %call = call i8 @aext_stack_arg_i8(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i8 %a)
   ret i8 %call
@@ -154,13 +160,14 @@ entry:
 define  signext i16 @sext_arg_i16(i16 signext %a) {
   ; MIPS32-LABEL: name: sext_arg_i16
   ; MIPS32: bb.1.entry:
-  ; MIPS32:   liveins: $a0
-  ; MIPS32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; MIPS32:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16
-  ; MIPS32:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
-  ; MIPS32:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
-  ; MIPS32:   $v0 = COPY [[SEXT]](s32)
-  ; MIPS32:   RetRA implicit $v0
+  ; MIPS32-NEXT:   liveins: $a0
+  ; MIPS32-NEXT: {{  $}}
+  ; MIPS32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32-NEXT:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16
+  ; MIPS32-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
+  ; MIPS32-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
+  ; MIPS32-NEXT:   $v0 = COPY [[SEXT]](s32)
+  ; MIPS32-NEXT:   RetRA implicit $v0
 entry:
   ret i16 %a
 }
@@ -168,13 +175,14 @@ entry:
 define zeroext i16 @zext_arg_i16(i16 zeroext %a) {
   ; MIPS32-LABEL: name: zext_arg_i16
   ; MIPS32: bb.1.entry:
-  ; MIPS32:   liveins: $a0
-  ; MIPS32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; MIPS32:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16
-  ; MIPS32:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
-  ; MIPS32:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
-  ; MIPS32:   $v0 = COPY [[ZEXT]](s32)
-  ; MIPS32:   RetRA implicit $v0
+  ; MIPS32-NEXT:   liveins: $a0
+  ; MIPS32-NEXT: {{  $}}
+  ; MIPS32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16
+  ; MIPS32-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
+  ; MIPS32-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
+  ; MIPS32-NEXT:   $v0 = COPY [[ZEXT]](s32)
+  ; MIPS32-NEXT:   RetRA implicit $v0
 entry:
   ret i16 %a
 }
@@ -182,12 +190,13 @@ entry:
 define i16 @aext_arg_i16(i16 %a) {
   ; MIPS32-LABEL: name: aext_arg_i16
   ; MIPS32: bb.1.entry:
-  ; MIPS32:   liveins: $a0
-  ; MIPS32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; MIPS32:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-  ; MIPS32:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
-  ; MIPS32:   $v0 = COPY [[ANYEXT]](s32)
-  ; MIPS32:   RetRA implicit $v0
+  ; MIPS32-NEXT:   liveins: $a0
+  ; MIPS32-NEXT: {{  $}}
+  ; MIPS32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+  ; MIPS32-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
+  ; MIPS32-NEXT:   $v0 = COPY [[ANYEXT]](s32)
+  ; MIPS32-NEXT:   RetRA implicit $v0
 entry:
   ret i16 %a
 }
@@ -199,33 +208,34 @@ declare i16 @aext_stack_arg_i16(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i16 %a)
 define signext i16 @call_sext_stack_arg_i16(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i16 signext %a) {
   ; MIPS32-LABEL: name: call_sext_stack_arg_i16
   ; MIPS32: bb.1.entry:
-  ; MIPS32:   liveins: $a0, $a1, $a2, $a3
-  ; MIPS32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; MIPS32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-  ; MIPS32:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
-  ; MIPS32:   [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
-  ; MIPS32:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; MIPS32:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8)
-  ; MIPS32:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[LOAD]], 16
-  ; MIPS32:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
-  ; MIPS32:   ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
-  ; MIPS32:   $a0 = COPY [[COPY]](s32)
-  ; MIPS32:   $a1 = COPY [[COPY1]](s32)
-  ; MIPS32:   $a2 = COPY [[COPY2]](s32)
-  ; MIPS32:   $a3 = COPY [[COPY3]](s32)
-  ; MIPS32:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
-  ; MIPS32:   [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
-  ; MIPS32:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-  ; MIPS32:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
-  ; MIPS32:   G_STORE [[SEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
-  ; MIPS32:   JAL @sext_stack_arg_i16, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
-  ; MIPS32:   [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
-  ; MIPS32:   [[ASSERT_SEXT1:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY5]], 16
-  ; MIPS32:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT1]](s32)
-  ; MIPS32:   ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp
-  ; MIPS32:   [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16)
-  ; MIPS32:   $v0 = COPY [[SEXT1]](s32)
-  ; MIPS32:   RetRA implicit $v0
+  ; MIPS32-NEXT:   liveins: $a0, $a1, $a2, $a3
+  ; MIPS32-NEXT: {{  $}}
+  ; MIPS32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; MIPS32-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+  ; MIPS32-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+  ; MIPS32-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; MIPS32-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8)
+  ; MIPS32-NEXT:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[LOAD]], 16
+  ; MIPS32-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
+  ; MIPS32-NEXT:   ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
+  ; MIPS32-NEXT:   $a0 = COPY [[COPY]](s32)
+  ; MIPS32-NEXT:   $a1 = COPY [[COPY1]](s32)
+  ; MIPS32-NEXT:   $a2 = COPY [[COPY2]](s32)
+  ; MIPS32-NEXT:   $a3 = COPY [[COPY3]](s32)
+  ; MIPS32-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
+  ; MIPS32-NEXT:   [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
+  ; MIPS32-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; MIPS32-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
+  ; MIPS32-NEXT:   G_STORE [[SEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
+  ; MIPS32-NEXT:   JAL @sext_stack_arg_i16, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
+  ; MIPS32-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
+  ; MIPS32-NEXT:   [[ASSERT_SEXT1:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY5]], 16
+  ; MIPS32-NEXT:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT1]](s32)
+  ; MIPS32-NEXT:   ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp
+  ; MIPS32-NEXT:   [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16)
+  ; MIPS32-NEXT:   $v0 = COPY [[SEXT1]](s32)
+  ; MIPS32-NEXT:   RetRA implicit $v0
 entry:
   %call = call signext i16 @sext_stack_arg_i16(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i16 signext %a)
   ret i16 %call
@@ -234,33 +244,34 @@ entry:
 define zeroext i16 @call_zext_stack_arg_i16(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i16 zeroext %a) {
   ; MIPS32-LABEL: name: call_zext_stack_arg_i16
   ; MIPS32: bb.1.entry:
-  ; MIPS32:   liveins: $a0, $a1, $a2, $a3
-  ; MIPS32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; MIPS32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-  ; MIPS32:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
-  ; MIPS32:   [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
-  ; MIPS32:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; MIPS32:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8)
-  ; MIPS32:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 16
-  ; MIPS32:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
-  ; MIPS32:   ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
-  ; MIPS32:   $a0 = COPY [[COPY]](s32)
-  ; MIPS32:   $a1 = COPY [[COPY1]](s32)
-  ; MIPS32:   $a2 = COPY [[COPY2]](s32)
-  ; MIPS32:   $a3 = COPY [[COPY3]](s32)
-  ; MIPS32:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
-  ; MIPS32:   [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
-  ; MIPS32:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-  ; MIPS32:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
-  ; MIPS32:   G_STORE [[ZEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
-  ; MIPS32:   JAL @zext_stack_arg_i16, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
-  ; MIPS32:   [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
-  ; MIPS32:   [[ASSERT_ZEXT1:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY5]], 16
-  ; MIPS32:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT1]](s32)
-  ; MIPS32:   ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp
-  ; MIPS32:   [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC1]](s16)
-  ; MIPS32:   $v0 = COPY [[ZEXT1]](s32)
-  ; MIPS32:   RetRA implicit $v0
+  ; MIPS32-NEXT:   liveins: $a0, $a1, $a2, $a3
+  ; MIPS32-NEXT: {{  $}}
+  ; MIPS32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; MIPS32-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+  ; MIPS32-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+  ; MIPS32-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; MIPS32-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8)
+  ; MIPS32-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 16
+  ; MIPS32-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
+  ; MIPS32-NEXT:   ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
+  ; MIPS32-NEXT:   $a0 = COPY [[COPY]](s32)
+  ; MIPS32-NEXT:   $a1 = COPY [[COPY1]](s32)
+  ; MIPS32-NEXT:   $a2 = COPY [[COPY2]](s32)
+  ; MIPS32-NEXT:   $a3 = COPY [[COPY3]](s32)
+  ; MIPS32-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
+  ; MIPS32-NEXT:   [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
+  ; MIPS32-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; MIPS32-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
+  ; MIPS32-NEXT:   G_STORE [[ZEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
+  ; MIPS32-NEXT:   JAL @zext_stack_arg_i16, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
+  ; MIPS32-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
+  ; MIPS32-NEXT:   [[ASSERT_ZEXT1:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY5]], 16
+  ; MIPS32-NEXT:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT1]](s32)
+  ; MIPS32-NEXT:   ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp
+  ; MIPS32-NEXT:   [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC1]](s16)
+  ; MIPS32-NEXT:   $v0 = COPY [[ZEXT1]](s32)
+  ; MIPS32-NEXT:   RetRA implicit $v0
 entry:
   %call = call zeroext i16 @zext_stack_arg_i16(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i16 zeroext %a)
   ret i16 %call
@@ -269,31 +280,32 @@ entry:
 define i16 @call_aext_stack_arg_i16(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i16 %a) {
   ; MIPS32-LABEL: name: call_aext_stack_arg_i16
   ; MIPS32: bb.1.entry:
-  ; MIPS32:   liveins: $a0, $a1, $a2, $a3
-  ; MIPS32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; MIPS32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-  ; MIPS32:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
-  ; MIPS32:   [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
-  ; MIPS32:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; MIPS32:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8)
-  ; MIPS32:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-  ; MIPS32:   ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
-  ; MIPS32:   $a0 = COPY [[COPY]](s32)
-  ; MIPS32:   $a1 = COPY [[COPY1]](s32)
-  ; MIPS32:   $a2 = COPY [[COPY2]](s32)
-  ; MIPS32:   $a3 = COPY [[COPY3]](s32)
-  ; MIPS32:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
-  ; MIPS32:   [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
-  ; MIPS32:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-  ; MIPS32:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
-  ; MIPS32:   G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
-  ; MIPS32:   JAL @aext_stack_arg_i16, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
-  ; MIPS32:   [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
-  ; MIPS32:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32)
-  ; MIPS32:   ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp
-  ; MIPS32:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16)
-  ; MIPS32:   $v0 = COPY [[ANYEXT1]](s32)
-  ; MIPS32:   RetRA implicit $v0
+  ; MIPS32-NEXT:   liveins: $a0, $a1, $a2, $a3
+  ; MIPS32-NEXT: {{  $}}
+  ; MIPS32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; MIPS32-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+  ; MIPS32-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+  ; MIPS32-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; MIPS32-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8)
+  ; MIPS32-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+  ; MIPS32-NEXT:   ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
+  ; MIPS32-NEXT:   $a0 = COPY [[COPY]](s32)
+  ; MIPS32-NEXT:   $a1 = COPY [[COPY1]](s32)
+  ; MIPS32-NEXT:   $a2 = COPY [[COPY2]](s32)
+  ; MIPS32-NEXT:   $a3 = COPY [[COPY3]](s32)
+  ; MIPS32-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
+  ; MIPS32-NEXT:   [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
+  ; MIPS32-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; MIPS32-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
+  ; MIPS32-NEXT:   G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
+  ; MIPS32-NEXT:   JAL @aext_stack_arg_i16, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
+  ; MIPS32-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
+  ; MIPS32-NEXT:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32)
+  ; MIPS32-NEXT:   ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp
+  ; MIPS32-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s16)
+  ; MIPS32-NEXT:   $v0 = COPY [[ANYEXT1]](s32)
+  ; MIPS32-NEXT:   RetRA implicit $v0
 entry:
   %call = call i16 @aext_stack_arg_i16(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i16 %a)
   ret i16 %call

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/float_args.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/float_args.ll
index 668057dc8e374..0dc649b3648b5 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/float_args.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/float_args.ll
@@ -6,18 +6,20 @@
 define float @float_in_fpr(float %a, float %b) {
   ; FP32-LABEL: name: float_in_fpr
   ; FP32: bb.1.entry:
-  ; FP32:   liveins: $f12, $f14
-  ; FP32:   [[COPY:%[0-9]+]]:_(s32) = COPY $f12
-  ; FP32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $f14
-  ; FP32:   $f0 = COPY [[COPY1]](s32)
-  ; FP32:   RetRA implicit $f0
+  ; FP32-NEXT:   liveins: $f12, $f14
+  ; FP32-NEXT: {{  $}}
+  ; FP32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $f12
+  ; FP32-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $f14
+  ; FP32-NEXT:   $f0 = COPY [[COPY1]](s32)
+  ; FP32-NEXT:   RetRA implicit $f0
   ; FP64-LABEL: name: float_in_fpr
   ; FP64: bb.1.entry:
-  ; FP64:   liveins: $f12, $f14
-  ; FP64:   [[COPY:%[0-9]+]]:_(s32) = COPY $f12
-  ; FP64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $f14
-  ; FP64:   $f0 = COPY [[COPY1]](s32)
-  ; FP64:   RetRA implicit $f0
+  ; FP64-NEXT:   liveins: $f12, $f14
+  ; FP64-NEXT: {{  $}}
+  ; FP64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $f12
+  ; FP64-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $f14
+  ; FP64-NEXT:   $f0 = COPY [[COPY1]](s32)
+  ; FP64-NEXT:   RetRA implicit $f0
 entry:
   ret float %b
 }
@@ -25,18 +27,20 @@ entry:
 define double @double_in_fpr(double %a, double %b) {
   ; FP32-LABEL: name: double_in_fpr
   ; FP32: bb.1.entry:
-  ; FP32:   liveins: $d6, $d7
-  ; FP32:   [[COPY:%[0-9]+]]:_(s64) = COPY $d6
-  ; FP32:   [[COPY1:%[0-9]+]]:_(s64) = COPY $d7
-  ; FP32:   $d0 = COPY [[COPY1]](s64)
-  ; FP32:   RetRA implicit $d0
+  ; FP32-NEXT:   liveins: $d6, $d7
+  ; FP32-NEXT: {{  $}}
+  ; FP32-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d6
+  ; FP32-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $d7
+  ; FP32-NEXT:   $d0 = COPY [[COPY1]](s64)
+  ; FP32-NEXT:   RetRA implicit $d0
   ; FP64-LABEL: name: double_in_fpr
   ; FP64: bb.1.entry:
-  ; FP64:   liveins: $d12_64, $d14_64
-  ; FP64:   [[COPY:%[0-9]+]]:_(s64) = COPY $d12_64
-  ; FP64:   [[COPY1:%[0-9]+]]:_(s64) = COPY $d14_64
-  ; FP64:   $d0_64 = COPY [[COPY1]](s64)
-  ; FP64:   RetRA implicit $d0_64
+  ; FP64-NEXT:   liveins: $d12_64, $d14_64
+  ; FP64-NEXT: {{  $}}
+  ; FP64-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d12_64
+  ; FP64-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $d14_64
+  ; FP64-NEXT:   $d0_64 = COPY [[COPY1]](s64)
+  ; FP64-NEXT:   RetRA implicit $d0_64
 entry:
   ret double %b
 }
@@ -44,18 +48,20 @@ entry:
 define float @float_in_gpr(i32 %a, float %b) {
   ; FP32-LABEL: name: float_in_gpr
   ; FP32: bb.1.entry:
-  ; FP32:   liveins: $a0, $a1
-  ; FP32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; FP32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-  ; FP32:   $f0 = COPY [[COPY1]](s32)
-  ; FP32:   RetRA implicit $f0
+  ; FP32-NEXT:   liveins: $a0, $a1
+  ; FP32-NEXT: {{  $}}
+  ; FP32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; FP32-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; FP32-NEXT:   $f0 = COPY [[COPY1]](s32)
+  ; FP32-NEXT:   RetRA implicit $f0
   ; FP64-LABEL: name: float_in_gpr
   ; FP64: bb.1.entry:
-  ; FP64:   liveins: $a0, $a1
-  ; FP64:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; FP64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-  ; FP64:   $f0 = COPY [[COPY1]](s32)
-  ; FP64:   RetRA implicit $f0
+  ; FP64-NEXT:   liveins: $a0, $a1
+  ; FP64-NEXT: {{  $}}
+  ; FP64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; FP64-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; FP64-NEXT:   $f0 = COPY [[COPY1]](s32)
+  ; FP64-NEXT:   RetRA implicit $f0
 entry:
   ret float %b
 }
@@ -63,22 +69,24 @@ entry:
 define double @double_in_gpr(i32 %a, double %b) {
   ; FP32-LABEL: name: double_in_gpr
   ; FP32: bb.1.entry:
-  ; FP32:   liveins: $a0, $a2, $a3
-  ; FP32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; FP32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a2
-  ; FP32:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a3
-  ; FP32:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
-  ; FP32:   $d0 = COPY [[MV]](s64)
-  ; FP32:   RetRA implicit $d0
+  ; FP32-NEXT:   liveins: $a0, $a2, $a3
+  ; FP32-NEXT: {{  $}}
+  ; FP32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; FP32-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a2
+  ; FP32-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a3
+  ; FP32-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; FP32-NEXT:   $d0 = COPY [[MV]](s64)
+  ; FP32-NEXT:   RetRA implicit $d0
   ; FP64-LABEL: name: double_in_gpr
   ; FP64: bb.1.entry:
-  ; FP64:   liveins: $a0, $a2, $a3
-  ; FP64:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; FP64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a2
-  ; FP64:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a3
-  ; FP64:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
-  ; FP64:   $d0_64 = COPY [[MV]](s64)
-  ; FP64:   RetRA implicit $d0_64
+  ; FP64-NEXT:   liveins: $a0, $a2, $a3
+  ; FP64-NEXT: {{  $}}
+  ; FP64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; FP64-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a2
+  ; FP64-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a3
+  ; FP64-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; FP64-NEXT:   $d0_64 = COPY [[MV]](s64)
+  ; FP64-NEXT:   RetRA implicit $d0_64
 entry:
   ret double %b
 }
@@ -86,30 +94,32 @@ entry:
 define float @call_float_in_fpr(float %a, float %b) {
   ; FP32-LABEL: name: call_float_in_fpr
   ; FP32: bb.1.entry:
-  ; FP32:   liveins: $f12, $f14
-  ; FP32:   [[COPY:%[0-9]+]]:_(s32) = COPY $f12
-  ; FP32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $f14
-  ; FP32:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
-  ; FP32:   $f12 = COPY [[COPY]](s32)
-  ; FP32:   $f14 = COPY [[COPY1]](s32)
-  ; FP32:   JAL @float_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0
-  ; FP32:   [[COPY2:%[0-9]+]]:_(s32) = COPY $f0
-  ; FP32:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
-  ; FP32:   $f0 = COPY [[COPY2]](s32)
-  ; FP32:   RetRA implicit $f0
+  ; FP32-NEXT:   liveins: $f12, $f14
+  ; FP32-NEXT: {{  $}}
+  ; FP32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $f12
+  ; FP32-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $f14
+  ; FP32-NEXT:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; FP32-NEXT:   $f12 = COPY [[COPY]](s32)
+  ; FP32-NEXT:   $f14 = COPY [[COPY1]](s32)
+  ; FP32-NEXT:   JAL @float_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0
+  ; FP32-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $f0
+  ; FP32-NEXT:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; FP32-NEXT:   $f0 = COPY [[COPY2]](s32)
+  ; FP32-NEXT:   RetRA implicit $f0
   ; FP64-LABEL: name: call_float_in_fpr
   ; FP64: bb.1.entry:
-  ; FP64:   liveins: $f12, $f14
-  ; FP64:   [[COPY:%[0-9]+]]:_(s32) = COPY $f12
-  ; FP64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $f14
-  ; FP64:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
-  ; FP64:   $f12 = COPY [[COPY]](s32)
-  ; FP64:   $f14 = COPY [[COPY1]](s32)
-  ; FP64:   JAL @float_in_fpr, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0
-  ; FP64:   [[COPY2:%[0-9]+]]:_(s32) = COPY $f0
-  ; FP64:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
-  ; FP64:   $f0 = COPY [[COPY2]](s32)
-  ; FP64:   RetRA implicit $f0
+  ; FP64-NEXT:   liveins: $f12, $f14
+  ; FP64-NEXT: {{  $}}
+  ; FP64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $f12
+  ; FP64-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $f14
+  ; FP64-NEXT:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; FP64-NEXT:   $f12 = COPY [[COPY]](s32)
+  ; FP64-NEXT:   $f14 = COPY [[COPY1]](s32)
+  ; FP64-NEXT:   JAL @float_in_fpr, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $f12, implicit $f14, implicit-def $f0
+  ; FP64-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $f0
+  ; FP64-NEXT:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; FP64-NEXT:   $f0 = COPY [[COPY2]](s32)
+  ; FP64-NEXT:   RetRA implicit $f0
 entry:
   %call = call float @float_in_fpr(float %a, float %b)
   ret float %call
@@ -118,30 +128,32 @@ entry:
 define double @call_double_in_fpr(double %a, double %b) {
   ; FP32-LABEL: name: call_double_in_fpr
   ; FP32: bb.1.entry:
-  ; FP32:   liveins: $d6, $d7
-  ; FP32:   [[COPY:%[0-9]+]]:_(s64) = COPY $d6
-  ; FP32:   [[COPY1:%[0-9]+]]:_(s64) = COPY $d7
-  ; FP32:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
-  ; FP32:   $d6 = COPY [[COPY]](s64)
-  ; FP32:   $d7 = COPY [[COPY1]](s64)
-  ; FP32:   JAL @double_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit $d7, implicit-def $d0
-  ; FP32:   [[COPY2:%[0-9]+]]:_(s64) = COPY $d0
-  ; FP32:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
-  ; FP32:   $d0 = COPY [[COPY2]](s64)
-  ; FP32:   RetRA implicit $d0
+  ; FP32-NEXT:   liveins: $d6, $d7
+  ; FP32-NEXT: {{  $}}
+  ; FP32-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d6
+  ; FP32-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $d7
+  ; FP32-NEXT:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; FP32-NEXT:   $d6 = COPY [[COPY]](s64)
+  ; FP32-NEXT:   $d7 = COPY [[COPY1]](s64)
+  ; FP32-NEXT:   JAL @double_in_fpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6, implicit $d7, implicit-def $d0
+  ; FP32-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $d0
+  ; FP32-NEXT:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; FP32-NEXT:   $d0 = COPY [[COPY2]](s64)
+  ; FP32-NEXT:   RetRA implicit $d0
   ; FP64-LABEL: name: call_double_in_fpr
   ; FP64: bb.1.entry:
-  ; FP64:   liveins: $d12_64, $d14_64
-  ; FP64:   [[COPY:%[0-9]+]]:_(s64) = COPY $d12_64
-  ; FP64:   [[COPY1:%[0-9]+]]:_(s64) = COPY $d14_64
-  ; FP64:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
-  ; FP64:   $d12_64 = COPY [[COPY]](s64)
-  ; FP64:   $d14_64 = COPY [[COPY1]](s64)
-  ; FP64:   JAL @double_in_fpr, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $d12_64, implicit $d14_64, implicit-def $d0_64
-  ; FP64:   [[COPY2:%[0-9]+]]:_(s64) = COPY $d0_64
-  ; FP64:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
-  ; FP64:   $d0_64 = COPY [[COPY2]](s64)
-  ; FP64:   RetRA implicit $d0_64
+  ; FP64-NEXT:   liveins: $d12_64, $d14_64
+  ; FP64-NEXT: {{  $}}
+  ; FP64-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d12_64
+  ; FP64-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $d14_64
+  ; FP64-NEXT:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; FP64-NEXT:   $d12_64 = COPY [[COPY]](s64)
+  ; FP64-NEXT:   $d14_64 = COPY [[COPY1]](s64)
+  ; FP64-NEXT:   JAL @double_in_fpr, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $d12_64, implicit $d14_64, implicit-def $d0_64
+  ; FP64-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $d0_64
+  ; FP64-NEXT:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; FP64-NEXT:   $d0_64 = COPY [[COPY2]](s64)
+  ; FP64-NEXT:   RetRA implicit $d0_64
 entry:
   %call = call double @double_in_fpr(double %a, double %b)
   ret double %call
@@ -150,30 +162,32 @@ entry:
 define float @call_float_in_gpr(i32 %a, float %b) {
   ; FP32-LABEL: name: call_float_in_gpr
   ; FP32: bb.1.entry:
-  ; FP32:   liveins: $a0, $a1
-  ; FP32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; FP32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-  ; FP32:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
-  ; FP32:   $a0 = COPY [[COPY]](s32)
-  ; FP32:   $a1 = COPY [[COPY1]](s32)
-  ; FP32:   JAL @float_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $f0
-  ; FP32:   [[COPY2:%[0-9]+]]:_(s32) = COPY $f0
-  ; FP32:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
-  ; FP32:   $f0 = COPY [[COPY2]](s32)
-  ; FP32:   RetRA implicit $f0
+  ; FP32-NEXT:   liveins: $a0, $a1
+  ; FP32-NEXT: {{  $}}
+  ; FP32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; FP32-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; FP32-NEXT:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; FP32-NEXT:   $a0 = COPY [[COPY]](s32)
+  ; FP32-NEXT:   $a1 = COPY [[COPY1]](s32)
+  ; FP32-NEXT:   JAL @float_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $f0
+  ; FP32-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $f0
+  ; FP32-NEXT:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; FP32-NEXT:   $f0 = COPY [[COPY2]](s32)
+  ; FP32-NEXT:   RetRA implicit $f0
   ; FP64-LABEL: name: call_float_in_gpr
   ; FP64: bb.1.entry:
-  ; FP64:   liveins: $a0, $a1
-  ; FP64:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; FP64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-  ; FP64:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
-  ; FP64:   $a0 = COPY [[COPY]](s32)
-  ; FP64:   $a1 = COPY [[COPY1]](s32)
-  ; FP64:   JAL @float_in_gpr, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $f0
-  ; FP64:   [[COPY2:%[0-9]+]]:_(s32) = COPY $f0
-  ; FP64:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
-  ; FP64:   $f0 = COPY [[COPY2]](s32)
-  ; FP64:   RetRA implicit $f0
+  ; FP64-NEXT:   liveins: $a0, $a1
+  ; FP64-NEXT: {{  $}}
+  ; FP64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; FP64-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; FP64-NEXT:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; FP64-NEXT:   $a0 = COPY [[COPY]](s32)
+  ; FP64-NEXT:   $a1 = COPY [[COPY1]](s32)
+  ; FP64-NEXT:   JAL @float_in_gpr, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $f0
+  ; FP64-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $f0
+  ; FP64-NEXT:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; FP64-NEXT:   $f0 = COPY [[COPY2]](s32)
+  ; FP64-NEXT:   RetRA implicit $f0
 entry:
   %call = call float @float_in_gpr(i32 %a, float %b)
   ret float %call
@@ -183,38 +197,40 @@ entry:
 define double @call_double_in_gpr(i32 %a, double %b) {
   ; FP32-LABEL: name: call_double_in_gpr
   ; FP32: bb.1.entry:
-  ; FP32:   liveins: $a0, $a2, $a3
-  ; FP32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; FP32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a2
-  ; FP32:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a3
-  ; FP32:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
-  ; FP32:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
-  ; FP32:   $a0 = COPY [[COPY]](s32)
-  ; FP32:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
-  ; FP32:   $a2 = COPY [[UV]](s32)
-  ; FP32:   $a3 = COPY [[UV1]](s32)
-  ; FP32:   JAL @double_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0
-  ; FP32:   [[COPY3:%[0-9]+]]:_(s64) = COPY $d0
-  ; FP32:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
-  ; FP32:   $d0 = COPY [[COPY3]](s64)
-  ; FP32:   RetRA implicit $d0
+  ; FP32-NEXT:   liveins: $a0, $a2, $a3
+  ; FP32-NEXT: {{  $}}
+  ; FP32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; FP32-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a2
+  ; FP32-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a3
+  ; FP32-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; FP32-NEXT:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; FP32-NEXT:   $a0 = COPY [[COPY]](s32)
+  ; FP32-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
+  ; FP32-NEXT:   $a2 = COPY [[UV]](s32)
+  ; FP32-NEXT:   $a3 = COPY [[UV1]](s32)
+  ; FP32-NEXT:   JAL @double_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0
+  ; FP32-NEXT:   [[COPY3:%[0-9]+]]:_(s64) = COPY $d0
+  ; FP32-NEXT:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; FP32-NEXT:   $d0 = COPY [[COPY3]](s64)
+  ; FP32-NEXT:   RetRA implicit $d0
   ; FP64-LABEL: name: call_double_in_gpr
   ; FP64: bb.1.entry:
-  ; FP64:   liveins: $a0, $a2, $a3
-  ; FP64:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; FP64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a2
-  ; FP64:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a3
-  ; FP64:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
-  ; FP64:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
-  ; FP64:   $a0 = COPY [[COPY]](s32)
-  ; FP64:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
-  ; FP64:   $a2 = COPY [[UV]](s32)
-  ; FP64:   $a3 = COPY [[UV1]](s32)
-  ; FP64:   JAL @double_in_gpr, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0_64
-  ; FP64:   [[COPY3:%[0-9]+]]:_(s64) = COPY $d0_64
-  ; FP64:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
-  ; FP64:   $d0_64 = COPY [[COPY3]](s64)
-  ; FP64:   RetRA implicit $d0_64
+  ; FP64-NEXT:   liveins: $a0, $a2, $a3
+  ; FP64-NEXT: {{  $}}
+  ; FP64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; FP64-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a2
+  ; FP64-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a3
+  ; FP64-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; FP64-NEXT:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; FP64-NEXT:   $a0 = COPY [[COPY]](s32)
+  ; FP64-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
+  ; FP64-NEXT:   $a2 = COPY [[UV]](s32)
+  ; FP64-NEXT:   $a3 = COPY [[UV1]](s32)
+  ; FP64-NEXT:   JAL @double_in_gpr, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0_64
+  ; FP64-NEXT:   [[COPY3:%[0-9]+]]:_(s64) = COPY $d0_64
+  ; FP64-NEXT:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; FP64-NEXT:   $d0_64 = COPY [[COPY3]](s64)
+  ; FP64-NEXT:   RetRA implicit $d0_64
 entry:
   %call = call double @double_in_gpr(i32 %a, double %b)
   ret double %call

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/stack_args.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/stack_args.ll
index 9375be547fae6..b823f66a64cd9 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/stack_args.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/stack_args.ll
@@ -6,27 +6,28 @@ declare i32 @f(i32, i32, i32, i32, i32)
 define  i32 @g(i32  %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5){
   ; MIPS32-LABEL: name: g
   ; MIPS32: bb.1.entry:
-  ; MIPS32:   liveins: $a0, $a1, $a2, $a3
-  ; MIPS32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-  ; MIPS32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-  ; MIPS32:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
-  ; MIPS32:   [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
-  ; MIPS32:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; MIPS32:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8)
-  ; MIPS32:   ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
-  ; MIPS32:   $a0 = COPY [[COPY]](s32)
-  ; MIPS32:   $a1 = COPY [[COPY1]](s32)
-  ; MIPS32:   $a2 = COPY [[COPY2]](s32)
-  ; MIPS32:   $a3 = COPY [[COPY3]](s32)
-  ; MIPS32:   [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
-  ; MIPS32:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-  ; MIPS32:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
-  ; MIPS32:   G_STORE [[LOAD]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
-  ; MIPS32:   JAL @f, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
-  ; MIPS32:   [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
-  ; MIPS32:   ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp
-  ; MIPS32:   $v0 = COPY [[COPY5]](s32)
-  ; MIPS32:   RetRA implicit $v0
+  ; MIPS32-NEXT:   liveins: $a0, $a1, $a2, $a3
+  ; MIPS32-NEXT: {{  $}}
+  ; MIPS32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; MIPS32-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+  ; MIPS32-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+  ; MIPS32-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; MIPS32-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8)
+  ; MIPS32-NEXT:   ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
+  ; MIPS32-NEXT:   $a0 = COPY [[COPY]](s32)
+  ; MIPS32-NEXT:   $a1 = COPY [[COPY1]](s32)
+  ; MIPS32-NEXT:   $a2 = COPY [[COPY2]](s32)
+  ; MIPS32-NEXT:   $a3 = COPY [[COPY3]](s32)
+  ; MIPS32-NEXT:   [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
+  ; MIPS32-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; MIPS32-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
+  ; MIPS32-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
+  ; MIPS32-NEXT:   JAL @f, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
+  ; MIPS32-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
+  ; MIPS32-NEXT:   ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp
+  ; MIPS32-NEXT:   $v0 = COPY [[COPY5]](s32)
+  ; MIPS32-NEXT:   RetRA implicit $v0
 entry:
   %call = call i32 @f(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5)
   ret i32 %call

diff  --git a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
index f3e4de73e0d95..1ccfec531d026 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
@@ -9,67 +9,68 @@
 define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7, i8 %arg8) {
   ; X86-LABEL: name: test_i8_args_8
   ; X86: bb.1.entry:
-  ; X86:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.7
-  ; X86:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s8) from %fixed-stack.7, align 16)
-  ; X86:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32)
-  ; X86:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.6
-  ; X86:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.6, align 4)
-  ; X86:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32)
-  ; X86:   [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.5
-  ; X86:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load (s8) from %fixed-stack.5, align 8)
-  ; X86:   [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32)
-  ; X86:   [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.4
-  ; X86:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load (s8) from %fixed-stack.4, align 4)
-  ; X86:   [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32)
-  ; X86:   [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3
-  ; X86:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (invariant load (s8) from %fixed-stack.3, align 16)
-  ; X86:   [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32)
-  ; X86:   [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
-  ; X86:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (invariant load (s8) from %fixed-stack.2, align 4)
-  ; X86:   [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32)
-  ; X86:   [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
-  ; X86:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (invariant load (s8) from %fixed-stack.1, align 8)
-  ; X86:   [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32)
-  ; X86:   [[FRAME_INDEX7:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; X86:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (invariant load (s8) from %fixed-stack.0, align 4)
-  ; X86:   [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32)
-  ; X86:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_8bit
-  ; X86:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_8bit
-  ; X86:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_8bit
-  ; X86:   G_STORE [[TRUNC]](s8), [[GV]](p0) :: (store (s8) into @a1_8bit)
-  ; X86:   G_STORE [[TRUNC6]](s8), [[GV1]](p0) :: (store (s8) into @a7_8bit)
-  ; X86:   G_STORE [[TRUNC7]](s8), [[GV2]](p0) :: (store (s8) into @a8_8bit)
-  ; X86:   $al = COPY [[TRUNC]](s8)
-  ; X86:   RET 0, implicit $al
+  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.7
+  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s8) from %fixed-stack.7, align 16)
+  ; X86-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32)
+  ; X86-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.6
+  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.6, align 4)
+  ; X86-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32)
+  ; X86-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.5
+  ; X86-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load (s8) from %fixed-stack.5, align 8)
+  ; X86-NEXT:   [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32)
+  ; X86-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.4
+  ; X86-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load (s8) from %fixed-stack.4, align 4)
+  ; X86-NEXT:   [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32)
+  ; X86-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3
+  ; X86-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (invariant load (s8) from %fixed-stack.3, align 16)
+  ; X86-NEXT:   [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32)
+  ; X86-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
+  ; X86-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (invariant load (s8) from %fixed-stack.2, align 4)
+  ; X86-NEXT:   [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32)
+  ; X86-NEXT:   [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; X86-NEXT:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (invariant load (s8) from %fixed-stack.1, align 8)
+  ; X86-NEXT:   [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32)
+  ; X86-NEXT:   [[FRAME_INDEX7:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; X86-NEXT:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (invariant load (s8) from %fixed-stack.0, align 4)
+  ; X86-NEXT:   [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32)
+  ; X86-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_8bit
+  ; X86-NEXT:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_8bit
+  ; X86-NEXT:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_8bit
+  ; X86-NEXT:   G_STORE [[TRUNC]](s8), [[GV]](p0) :: (store (s8) into @a1_8bit)
+  ; X86-NEXT:   G_STORE [[TRUNC6]](s8), [[GV1]](p0) :: (store (s8) into @a7_8bit)
+  ; X86-NEXT:   G_STORE [[TRUNC7]](s8), [[GV2]](p0) :: (store (s8) into @a8_8bit)
+  ; X86-NEXT:   $al = COPY [[TRUNC]](s8)
+  ; X86-NEXT:   RET 0, implicit $al
   ; X64-LABEL: name: test_i8_args_8
   ; X64: bb.1.entry:
-  ; X64:   liveins: $ecx, $edi, $edx, $esi, $r8d, $r9d
-  ; X64:   [[COPY:%[0-9]+]]:_(s32) = COPY $edi
-  ; X64:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
-  ; X64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $esi
-  ; X64:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-  ; X64:   [[COPY2:%[0-9]+]]:_(s32) = COPY $edx
-  ; X64:   [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
-  ; X64:   [[COPY3:%[0-9]+]]:_(s32) = COPY $ecx
-  ; X64:   [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[COPY3]](s32)
-  ; X64:   [[COPY4:%[0-9]+]]:_(s32) = COPY $r8d
-  ; X64:   [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[COPY4]](s32)
-  ; X64:   [[COPY5:%[0-9]+]]:_(s32) = COPY $r9d
-  ; X64:   [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[COPY5]](s32)
-  ; X64:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
-  ; X64:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s8) from %fixed-stack.1, align 16)
-  ; X64:   [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32)
-  ; X64:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; X64:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.0, align 8)
-  ; X64:   [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32)
-  ; X64:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_8bit
-  ; X64:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_8bit
-  ; X64:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_8bit
-  ; X64:   G_STORE [[TRUNC]](s8), [[GV]](p0) :: (store (s8) into @a1_8bit)
-  ; X64:   G_STORE [[TRUNC6]](s8), [[GV1]](p0) :: (store (s8) into @a7_8bit)
-  ; X64:   G_STORE [[TRUNC7]](s8), [[GV2]](p0) :: (store (s8) into @a8_8bit)
-  ; X64:   $al = COPY [[TRUNC]](s8)
-  ; X64:   RET 0, implicit $al
+  ; X64-NEXT:   liveins: $ecx, $edi, $edx, $esi, $r8d, $r9d
+  ; X64-NEXT: {{  $}}
+  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $edi
+  ; X64-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $esi
+  ; X64-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+  ; X64-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $edx
+  ; X64-NEXT:   [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
+  ; X64-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $ecx
+  ; X64-NEXT:   [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[COPY3]](s32)
+  ; X64-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $r8d
+  ; X64-NEXT:   [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[COPY4]](s32)
+  ; X64-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $r9d
+  ; X64-NEXT:   [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[COPY5]](s32)
+  ; X64-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; X64-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s8) from %fixed-stack.1, align 16)
+  ; X64-NEXT:   [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32)
+  ; X64-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; X64-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.0, align 8)
+  ; X64-NEXT:   [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32)
+  ; X64-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_8bit
+  ; X64-NEXT:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_8bit
+  ; X64-NEXT:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_8bit
+  ; X64-NEXT:   G_STORE [[TRUNC]](s8), [[GV]](p0) :: (store (s8) into @a1_8bit)
+  ; X64-NEXT:   G_STORE [[TRUNC6]](s8), [[GV1]](p0) :: (store (s8) into @a7_8bit)
+  ; X64-NEXT:   G_STORE [[TRUNC7]](s8), [[GV2]](p0) :: (store (s8) into @a8_8bit)
+  ; X64-NEXT:   $al = COPY [[TRUNC]](s8)
+  ; X64-NEXT:   RET 0, implicit $al
 entry:
   store i8 %arg1, i8* @a1_8bit
   store i8 %arg7, i8* @a7_8bit
@@ -84,51 +85,52 @@ entry:
 define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8) {
   ; X86-LABEL: name: test_i32_args_8
   ; X86: bb.1.entry:
-  ; X86:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.7
-  ; X86:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.7, align 16)
-  ; X86:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.6
-  ; X86:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.6)
-  ; X86:   [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.5
-  ; X86:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load (s32) from %fixed-stack.5, align 8)
-  ; X86:   [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.4
-  ; X86:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load (s32) from %fixed-stack.4)
-  ; X86:   [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3
-  ; X86:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (invariant load (s32) from %fixed-stack.3, align 16)
-  ; X86:   [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
-  ; X86:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (invariant load (s32) from %fixed-stack.2)
-  ; X86:   [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
-  ; X86:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (invariant load (s32) from %fixed-stack.1, align 8)
-  ; X86:   [[FRAME_INDEX7:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; X86:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (invariant load (s32) from %fixed-stack.0)
-  ; X86:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_32bit
-  ; X86:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_32bit
-  ; X86:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_32bit
-  ; X86:   G_STORE [[LOAD]](s32), [[GV]](p0) :: (store (s32) into @a1_32bit)
-  ; X86:   G_STORE [[LOAD6]](s32), [[GV1]](p0) :: (store (s32) into @a7_32bit)
-  ; X86:   G_STORE [[LOAD7]](s32), [[GV2]](p0) :: (store (s32) into @a8_32bit)
-  ; X86:   $eax = COPY [[LOAD]](s32)
-  ; X86:   RET 0, implicit $eax
+  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.7
+  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.7, align 16)
+  ; X86-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.6
+  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.6)
+  ; X86-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.5
+  ; X86-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load (s32) from %fixed-stack.5, align 8)
+  ; X86-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.4
+  ; X86-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load (s32) from %fixed-stack.4)
+  ; X86-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3
+  ; X86-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (invariant load (s32) from %fixed-stack.3, align 16)
+  ; X86-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
+  ; X86-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (invariant load (s32) from %fixed-stack.2)
+  ; X86-NEXT:   [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; X86-NEXT:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (invariant load (s32) from %fixed-stack.1, align 8)
+  ; X86-NEXT:   [[FRAME_INDEX7:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; X86-NEXT:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (invariant load (s32) from %fixed-stack.0)
+  ; X86-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_32bit
+  ; X86-NEXT:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_32bit
+  ; X86-NEXT:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_32bit
+  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[GV]](p0) :: (store (s32) into @a1_32bit)
+  ; X86-NEXT:   G_STORE [[LOAD6]](s32), [[GV1]](p0) :: (store (s32) into @a7_32bit)
+  ; X86-NEXT:   G_STORE [[LOAD7]](s32), [[GV2]](p0) :: (store (s32) into @a8_32bit)
+  ; X86-NEXT:   $eax = COPY [[LOAD]](s32)
+  ; X86-NEXT:   RET 0, implicit $eax
   ; X64-LABEL: name: test_i32_args_8
   ; X64: bb.1.entry:
-  ; X64:   liveins: $ecx, $edi, $edx, $esi, $r8d, $r9d
-  ; X64:   [[COPY:%[0-9]+]]:_(s32) = COPY $edi
-  ; X64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $esi
-  ; X64:   [[COPY2:%[0-9]+]]:_(s32) = COPY $edx
-  ; X64:   [[COPY3:%[0-9]+]]:_(s32) = COPY $ecx
-  ; X64:   [[COPY4:%[0-9]+]]:_(s32) = COPY $r8d
-  ; X64:   [[COPY5:%[0-9]+]]:_(s32) = COPY $r9d
-  ; X64:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
-  ; X64:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.1, align 16)
-  ; X64:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; X64:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.0, align 8)
-  ; X64:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_32bit
-  ; X64:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_32bit
-  ; X64:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_32bit
-  ; X64:   G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @a1_32bit)
-  ; X64:   G_STORE [[LOAD]](s32), [[GV1]](p0) :: (store (s32) into @a7_32bit)
-  ; X64:   G_STORE [[LOAD1]](s32), [[GV2]](p0) :: (store (s32) into @a8_32bit)
-  ; X64:   $eax = COPY [[COPY]](s32)
-  ; X64:   RET 0, implicit $eax
+  ; X64-NEXT:   liveins: $ecx, $edi, $edx, $esi, $r8d, $r9d
+  ; X64-NEXT: {{  $}}
+  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $edi
+  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $esi
+  ; X64-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $edx
+  ; X64-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $ecx
+  ; X64-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $r8d
+  ; X64-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $r9d
+  ; X64-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; X64-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.1, align 16)
+  ; X64-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; X64-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.0, align 8)
+  ; X64-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_32bit
+  ; X64-NEXT:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_32bit
+  ; X64-NEXT:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_32bit
+  ; X64-NEXT:   G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @a1_32bit)
+  ; X64-NEXT:   G_STORE [[LOAD]](s32), [[GV1]](p0) :: (store (s32) into @a7_32bit)
+  ; X64-NEXT:   G_STORE [[LOAD1]](s32), [[GV2]](p0) :: (store (s32) into @a8_32bit)
+  ; X64-NEXT:   $eax = COPY [[COPY]](s32)
+  ; X64-NEXT:   RET 0, implicit $eax
 entry:
   store i32 %arg1, i32* @a1_32bit
   store i32 %arg7, i32* @a7_32bit
@@ -144,77 +146,78 @@ entry:
 define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6, i64 %arg7, i64 %arg8) {
   ; X86-LABEL: name: test_i64_args_8
   ; X86: bb.1.entry:
-  ; X86:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.15
-  ; X86:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.15, align 16)
-  ; X86:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.14
-  ; X86:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.14)
-  ; X86:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
-  ; X86:   [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.13
-  ; X86:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load (s32) from %fixed-stack.13, align 8)
-  ; X86:   [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.12
-  ; X86:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load (s32) from %fixed-stack.12)
-  ; X86:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
-  ; X86:   [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.11
-  ; X86:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (invariant load (s32) from %fixed-stack.11, align 16)
-  ; X86:   [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.10
-  ; X86:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (invariant load (s32) from %fixed-stack.10)
-  ; X86:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
-  ; X86:   [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.9
-  ; X86:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (invariant load (s32) from %fixed-stack.9, align 8)
-  ; X86:   [[FRAME_INDEX7:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.8
-  ; X86:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (invariant load (s32) from %fixed-stack.8)
-  ; X86:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
-  ; X86:   [[FRAME_INDEX8:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.7
-  ; X86:   [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p0) :: (invariant load (s32) from %fixed-stack.7, align 16)
-  ; X86:   [[FRAME_INDEX9:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.6
-  ; X86:   [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p0) :: (invariant load (s32) from %fixed-stack.6)
-  ; X86:   [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD8]](s32), [[LOAD9]](s32)
-  ; X86:   [[FRAME_INDEX10:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.5
-  ; X86:   [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p0) :: (invariant load (s32) from %fixed-stack.5, align 8)
-  ; X86:   [[FRAME_INDEX11:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.4
-  ; X86:   [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p0) :: (invariant load (s32) from %fixed-stack.4)
-  ; X86:   [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD10]](s32), [[LOAD11]](s32)
-  ; X86:   [[FRAME_INDEX12:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3
-  ; X86:   [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p0) :: (invariant load (s32) from %fixed-stack.3, align 16)
-  ; X86:   [[FRAME_INDEX13:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
-  ; X86:   [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p0) :: (invariant load (s32) from %fixed-stack.2)
-  ; X86:   [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD12]](s32), [[LOAD13]](s32)
-  ; X86:   [[FRAME_INDEX14:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
-  ; X86:   [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p0) :: (invariant load (s32) from %fixed-stack.1, align 8)
-  ; X86:   [[FRAME_INDEX15:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; X86:   [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p0) :: (invariant load (s32) from %fixed-stack.0)
-  ; X86:   [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD14]](s32), [[LOAD15]](s32)
-  ; X86:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_64bit
-  ; X86:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_64bit
-  ; X86:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_64bit
-  ; X86:   G_STORE [[MV]](s64), [[GV]](p0) :: (store (s64) into @a1_64bit, align 4)
-  ; X86:   G_STORE [[MV6]](s64), [[GV1]](p0) :: (store (s64) into @a7_64bit, align 4)
-  ; X86:   G_STORE [[MV7]](s64), [[GV2]](p0) :: (store (s64) into @a8_64bit, align 4)
-  ; X86:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
-  ; X86:   $eax = COPY [[UV]](s32)
-  ; X86:   $edx = COPY [[UV1]](s32)
-  ; X86:   RET 0, implicit $eax, implicit $edx
+  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.15
+  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.15, align 16)
+  ; X86-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.14
+  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.14)
+  ; X86-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+  ; X86-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.13
+  ; X86-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load (s32) from %fixed-stack.13, align 8)
+  ; X86-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.12
+  ; X86-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load (s32) from %fixed-stack.12)
+  ; X86-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+  ; X86-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.11
+  ; X86-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (invariant load (s32) from %fixed-stack.11, align 16)
+  ; X86-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.10
+  ; X86-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (invariant load (s32) from %fixed-stack.10)
+  ; X86-NEXT:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
+  ; X86-NEXT:   [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.9
+  ; X86-NEXT:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (invariant load (s32) from %fixed-stack.9, align 8)
+  ; X86-NEXT:   [[FRAME_INDEX7:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.8
+  ; X86-NEXT:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (invariant load (s32) from %fixed-stack.8)
+  ; X86-NEXT:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
+  ; X86-NEXT:   [[FRAME_INDEX8:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.7
+  ; X86-NEXT:   [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p0) :: (invariant load (s32) from %fixed-stack.7, align 16)
+  ; X86-NEXT:   [[FRAME_INDEX9:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.6
+  ; X86-NEXT:   [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p0) :: (invariant load (s32) from %fixed-stack.6)
+  ; X86-NEXT:   [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD8]](s32), [[LOAD9]](s32)
+  ; X86-NEXT:   [[FRAME_INDEX10:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.5
+  ; X86-NEXT:   [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p0) :: (invariant load (s32) from %fixed-stack.5, align 8)
+  ; X86-NEXT:   [[FRAME_INDEX11:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.4
+  ; X86-NEXT:   [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p0) :: (invariant load (s32) from %fixed-stack.4)
+  ; X86-NEXT:   [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD10]](s32), [[LOAD11]](s32)
+  ; X86-NEXT:   [[FRAME_INDEX12:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3
+  ; X86-NEXT:   [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p0) :: (invariant load (s32) from %fixed-stack.3, align 16)
+  ; X86-NEXT:   [[FRAME_INDEX13:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
+  ; X86-NEXT:   [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p0) :: (invariant load (s32) from %fixed-stack.2)
+  ; X86-NEXT:   [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD12]](s32), [[LOAD13]](s32)
+  ; X86-NEXT:   [[FRAME_INDEX14:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; X86-NEXT:   [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p0) :: (invariant load (s32) from %fixed-stack.1, align 8)
+  ; X86-NEXT:   [[FRAME_INDEX15:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; X86-NEXT:   [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p0) :: (invariant load (s32) from %fixed-stack.0)
+  ; X86-NEXT:   [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD14]](s32), [[LOAD15]](s32)
+  ; X86-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_64bit
+  ; X86-NEXT:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_64bit
+  ; X86-NEXT:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_64bit
+  ; X86-NEXT:   G_STORE [[MV]](s64), [[GV]](p0) :: (store (s64) into @a1_64bit, align 4)
+  ; X86-NEXT:   G_STORE [[MV6]](s64), [[GV1]](p0) :: (store (s64) into @a7_64bit, align 4)
+  ; X86-NEXT:   G_STORE [[MV7]](s64), [[GV2]](p0) :: (store (s64) into @a8_64bit, align 4)
+  ; X86-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
+  ; X86-NEXT:   $eax = COPY [[UV]](s32)
+  ; X86-NEXT:   $edx = COPY [[UV1]](s32)
+  ; X86-NEXT:   RET 0, implicit $eax, implicit $edx
   ; X64-LABEL: name: test_i64_args_8
   ; X64: bb.1.entry:
-  ; X64:   liveins: $rcx, $rdi, $rdx, $rsi, $r8, $r9
-  ; X64:   [[COPY:%[0-9]+]]:_(s64) = COPY $rdi
-  ; X64:   [[COPY1:%[0-9]+]]:_(s64) = COPY $rsi
-  ; X64:   [[COPY2:%[0-9]+]]:_(s64) = COPY $rdx
-  ; X64:   [[COPY3:%[0-9]+]]:_(s64) = COPY $rcx
-  ; X64:   [[COPY4:%[0-9]+]]:_(s64) = COPY $r8
-  ; X64:   [[COPY5:%[0-9]+]]:_(s64) = COPY $r9
-  ; X64:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
-  ; X64:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16)
-  ; X64:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; X64:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.0)
-  ; X64:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_64bit
-  ; X64:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_64bit
-  ; X64:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_64bit
-  ; X64:   G_STORE [[COPY]](s64), [[GV]](p0) :: (store (s64) into @a1_64bit)
-  ; X64:   G_STORE [[LOAD]](s64), [[GV1]](p0) :: (store (s64) into @a7_64bit)
-  ; X64:   G_STORE [[LOAD1]](s64), [[GV2]](p0) :: (store (s64) into @a8_64bit)
-  ; X64:   $rax = COPY [[COPY]](s64)
-  ; X64:   RET 0, implicit $rax
+  ; X64-NEXT:   liveins: $rcx, $rdi, $rdx, $rsi, $r8, $r9
+  ; X64-NEXT: {{  $}}
+  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $rdi
+  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $rsi
+  ; X64-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $rdx
+  ; X64-NEXT:   [[COPY3:%[0-9]+]]:_(s64) = COPY $rcx
+  ; X64-NEXT:   [[COPY4:%[0-9]+]]:_(s64) = COPY $r8
+  ; X64-NEXT:   [[COPY5:%[0-9]+]]:_(s64) = COPY $r9
+  ; X64-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; X64-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16)
+  ; X64-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; X64-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.0)
+  ; X64-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_64bit
+  ; X64-NEXT:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_64bit
+  ; X64-NEXT:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_64bit
+  ; X64-NEXT:   G_STORE [[COPY]](s64), [[GV]](p0) :: (store (s64) into @a1_64bit)
+  ; X64-NEXT:   G_STORE [[LOAD]](s64), [[GV1]](p0) :: (store (s64) into @a7_64bit)
+  ; X64-NEXT:   G_STORE [[LOAD1]](s64), [[GV2]](p0) :: (store (s64) into @a8_64bit)
+  ; X64-NEXT:   $rax = COPY [[COPY]](s64)
+  ; X64-NEXT:   RET 0, implicit $rax
 entry:
   store i64 %arg1, i64* @a1_64bit
   store i64 %arg7, i64* @a7_64bit
@@ -225,90 +228,96 @@ entry:
 define float @test_float_args(float %arg1, float %arg2) {
   ; X86-LABEL: name: test_float_args
   ; X86: bb.1 (%ir-block.0):
-  ; X86:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
-  ; X86:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.1, align 16)
-  ; X86:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; X86:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.0)
-  ; X86:   $fp0 = COPY [[LOAD1]](s32)
-  ; X86:   RET 0, implicit $fp0
+  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.1, align 16)
+  ; X86-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.0)
+  ; X86-NEXT:   $fp0 = COPY [[LOAD1]](s32)
+  ; X86-NEXT:   RET 0, implicit $fp0
   ; X64-LABEL: name: test_float_args
   ; X64: bb.1 (%ir-block.0):
-  ; X64:   liveins: $xmm0, $xmm1
-  ; X64:   [[COPY:%[0-9]+]]:_(s32) = COPY $xmm0
-  ; X64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $xmm1
-  ; X64:   $xmm0 = COPY [[COPY1]](s32)
-  ; X64:   RET 0, implicit $xmm0
+  ; X64-NEXT:   liveins: $xmm0, $xmm1
+  ; X64-NEXT: {{  $}}
+  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $xmm0
+  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $xmm1
+  ; X64-NEXT:   $xmm0 = COPY [[COPY1]](s32)
+  ; X64-NEXT:   RET 0, implicit $xmm0
   ret float %arg2
 }
 
 define double @test_double_args(double %arg1, double %arg2) {
   ; X86-LABEL: name: test_double_args
   ; X86: bb.1 (%ir-block.0):
-  ; X86:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
-  ; X86:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16)
-  ; X86:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; X86:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.0)
-  ; X86:   $fp0 = COPY [[LOAD1]](s64)
-  ; X86:   RET 0, implicit $fp0
+  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16)
+  ; X86-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.0)
+  ; X86-NEXT:   $fp0 = COPY [[LOAD1]](s64)
+  ; X86-NEXT:   RET 0, implicit $fp0
   ; X64-LABEL: name: test_double_args
   ; X64: bb.1 (%ir-block.0):
-  ; X64:   liveins: $xmm0, $xmm1
-  ; X64:   [[COPY:%[0-9]+]]:_(s64) = COPY $xmm0
-  ; X64:   [[COPY1:%[0-9]+]]:_(s64) = COPY $xmm1
-  ; X64:   $xmm0 = COPY [[COPY1]](s64)
-  ; X64:   RET 0, implicit $xmm0
+  ; X64-NEXT:   liveins: $xmm0, $xmm1
+  ; X64-NEXT: {{  $}}
+  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $xmm0
+  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $xmm1
+  ; X64-NEXT:   $xmm0 = COPY [[COPY1]](s64)
+  ; X64-NEXT:   RET 0, implicit $xmm0
   ret double %arg2
 }
 
 define <4 x i32> @test_v4i32_args(<4 x i32> %arg1, <4 x i32> %arg2) {
   ; X86-LABEL: name: test_v4i32_args
   ; X86: bb.1 (%ir-block.0):
-  ; X86:   liveins: $xmm0, $xmm1
-  ; X86:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
-  ; X86:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
-  ; X86:   $xmm0 = COPY [[COPY1]](<4 x s32>)
-  ; X86:   RET 0, implicit $xmm0
+  ; X86-NEXT:   liveins: $xmm0, $xmm1
+  ; X86-NEXT: {{  $}}
+  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
+  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
+  ; X86-NEXT:   $xmm0 = COPY [[COPY1]](<4 x s32>)
+  ; X86-NEXT:   RET 0, implicit $xmm0
   ; X64-LABEL: name: test_v4i32_args
   ; X64: bb.1 (%ir-block.0):
-  ; X64:   liveins: $xmm0, $xmm1
-  ; X64:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
-  ; X64:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
-  ; X64:   $xmm0 = COPY [[COPY1]](<4 x s32>)
-  ; X64:   RET 0, implicit $xmm0
+  ; X64-NEXT:   liveins: $xmm0, $xmm1
+  ; X64-NEXT: {{  $}}
+  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
+  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
+  ; X64-NEXT:   $xmm0 = COPY [[COPY1]](<4 x s32>)
+  ; X64-NEXT:   RET 0, implicit $xmm0
   ret <4 x i32> %arg2
 }
 
 define <8 x i32> @test_v8i32_args(<8 x i32> %arg1) {
   ; X86-LABEL: name: test_v8i32_args
   ; X86: bb.1 (%ir-block.0):
-  ; X86:   liveins: $xmm0, $xmm1
-  ; X86:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
-  ; X86:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
-  ; X86:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
-  ; X86:   [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>)
-  ; X86:   $xmm0 = COPY [[UV]](<4 x s32>)
-  ; X86:   $xmm1 = COPY [[UV1]](<4 x s32>)
-  ; X86:   RET 0, implicit $xmm0, implicit $xmm1
+  ; X86-NEXT:   liveins: $xmm0, $xmm1
+  ; X86-NEXT: {{  $}}
+  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
+  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
+  ; X86-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
+  ; X86-NEXT:   [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>)
+  ; X86-NEXT:   $xmm0 = COPY [[UV]](<4 x s32>)
+  ; X86-NEXT:   $xmm1 = COPY [[UV1]](<4 x s32>)
+  ; X86-NEXT:   RET 0, implicit $xmm0, implicit $xmm1
   ; X64-LABEL: name: test_v8i32_args
   ; X64: bb.1 (%ir-block.0):
-  ; X64:   liveins: $xmm0, $xmm1
-  ; X64:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
-  ; X64:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
-  ; X64:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
-  ; X64:   [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>)
-  ; X64:   $xmm0 = COPY [[UV]](<4 x s32>)
-  ; X64:   $xmm1 = COPY [[UV1]](<4 x s32>)
-  ; X64:   RET 0, implicit $xmm0, implicit $xmm1
+  ; X64-NEXT:   liveins: $xmm0, $xmm1
+  ; X64-NEXT: {{  $}}
+  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
+  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
+  ; X64-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
+  ; X64-NEXT:   [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>)
+  ; X64-NEXT:   $xmm0 = COPY [[UV]](<4 x s32>)
+  ; X64-NEXT:   $xmm1 = COPY [[UV1]](<4 x s32>)
+  ; X64-NEXT:   RET 0, implicit $xmm0, implicit $xmm1
   ret <8 x i32> %arg1
 }
 
 define void @test_void_return() {
   ; X86-LABEL: name: test_void_return
   ; X86: bb.1.entry:
-  ; X86:   RET 0
+  ; X86-NEXT:   RET 0
   ; X64-LABEL: name: test_void_return
   ; X64: bb.1.entry:
-  ; X64:   RET 0
+  ; X64-NEXT:   RET 0
 entry:
   ret void
 }
@@ -316,16 +325,17 @@ entry:
 define i32 * @test_memop_i32(i32 * %p1) {
   ; X86-LABEL: name: test_memop_i32
   ; X86: bb.1 (%ir-block.0):
-  ; X86:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; X86:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.0, align 16)
-  ; X86:   $eax = COPY [[LOAD]](p0)
-  ; X86:   RET 0, implicit $eax
+  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.0, align 16)
+  ; X86-NEXT:   $eax = COPY [[LOAD]](p0)
+  ; X86-NEXT:   RET 0, implicit $eax
   ; X64-LABEL: name: test_memop_i32
   ; X64: bb.1 (%ir-block.0):
-  ; X64:   liveins: $rdi
-  ; X64:   [[COPY:%[0-9]+]]:_(p0) = COPY $rdi
-  ; X64:   $rax = COPY [[COPY]](p0)
-  ; X64:   RET 0, implicit $rax
+  ; X64-NEXT:   liveins: $rdi
+  ; X64-NEXT: {{  $}}
+  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $rdi
+  ; X64-NEXT:   $rax = COPY [[COPY]](p0)
+  ; X64-NEXT:   RET 0, implicit $rax
   ret i32 * %p1;
 }
 
@@ -333,16 +343,16 @@ declare void @trivial_callee()
 define void @test_trivial_call() {
   ; X86-LABEL: name: test_trivial_call
   ; X86: bb.1 (%ir-block.0):
-  ; X86:   ADJCALLSTACKDOWN32 0, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   CALLpcrel32 @trivial_callee, csr_32, implicit $esp, implicit $ssp
-  ; X86:   ADJCALLSTACKUP32 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   RET 0
+  ; X86-NEXT:   ADJCALLSTACKDOWN32 0, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   CALLpcrel32 @trivial_callee, csr_32, implicit $esp, implicit $ssp
+  ; X86-NEXT:   ADJCALLSTACKUP32 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   RET 0
   ; X64-LABEL: name: test_trivial_call
   ; X64: bb.1 (%ir-block.0):
-  ; X64:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   CALL64pcrel32 @trivial_callee, csr_64, implicit $rsp, implicit $ssp
-  ; X64:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   RET 0
+  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   CALL64pcrel32 @trivial_callee, csr_64, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   RET 0
   call void @trivial_callee()
   ret void
 }
@@ -351,33 +361,34 @@ declare void @simple_arg_callee(i32 %in0, i32 %in1)
 define void @test_simple_arg(i32 %in0, i32 %in1) {
   ; X86-LABEL: name: test_simple_arg
   ; X86: bb.1 (%ir-block.0):
-  ; X86:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
-  ; X86:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.1, align 16)
-  ; X86:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; X86:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.0)
-  ; X86:   ADJCALLSTACKDOWN32 8, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; X86:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
-  ; X86:   G_STORE [[LOAD1]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1)
-  ; X86:   [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; X86:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32)
-  ; X86:   G_STORE [[LOAD]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1)
-  ; X86:   CALLpcrel32 @simple_arg_callee, csr_32, implicit $esp, implicit $ssp
-  ; X86:   ADJCALLSTACKUP32 8, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   RET 0
+  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.1, align 16)
+  ; X86-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.0)
+  ; X86-NEXT:   ADJCALLSTACKDOWN32 8, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; X86-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
+  ; X86-NEXT:   G_STORE [[LOAD1]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1)
+  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; X86-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32)
+  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1)
+  ; X86-NEXT:   CALLpcrel32 @simple_arg_callee, csr_32, implicit $esp, implicit $ssp
+  ; X86-NEXT:   ADJCALLSTACKUP32 8, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   RET 0
   ; X64-LABEL: name: test_simple_arg
   ; X64: bb.1 (%ir-block.0):
-  ; X64:   liveins: $edi, $esi
-  ; X64:   [[COPY:%[0-9]+]]:_(s32) = COPY $edi
-  ; X64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $esi
-  ; X64:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   $edi = COPY [[COPY1]](s32)
-  ; X64:   $esi = COPY [[COPY]](s32)
-  ; X64:   CALL64pcrel32 @simple_arg_callee, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi
-  ; X64:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   RET 0
+  ; X64-NEXT:   liveins: $edi, $esi
+  ; X64-NEXT: {{  $}}
+  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $edi
+  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $esi
+  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   $edi = COPY [[COPY1]](s32)
+  ; X64-NEXT:   $esi = COPY [[COPY]](s32)
+  ; X64-NEXT:   CALL64pcrel32 @simple_arg_callee, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi
+  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   RET 0
   call void @simple_arg_callee(i32 %in1, i32 %in0)
   ret void
 }
@@ -386,66 +397,67 @@ declare void @simple_arg8_callee(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32
 define void @test_simple_arg8_call(i32 %in0) {
   ; X86-LABEL: name: test_simple_arg8_call
   ; X86: bb.1 (%ir-block.0):
-  ; X86:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; X86:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16)
-  ; X86:   ADJCALLSTACKDOWN32 32, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; X86:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
-  ; X86:   G_STORE [[LOAD]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1)
-  ; X86:   [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; X86:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32)
-  ; X86:   G_STORE [[LOAD]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1)
-  ; X86:   [[COPY2:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; X86:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C2]](s32)
-  ; X86:   G_STORE [[LOAD]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 1)
-  ; X86:   [[COPY3:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; X86:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C3]](s32)
-  ; X86:   G_STORE [[LOAD]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12, align 1)
-  ; X86:   [[COPY4:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-  ; X86:   [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C4]](s32)
-  ; X86:   G_STORE [[LOAD]](s32), [[PTR_ADD4]](p0) :: (store (s32) into stack + 16, align 1)
-  ; X86:   [[COPY5:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; X86:   [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C5]](s32)
-  ; X86:   G_STORE [[LOAD]](s32), [[PTR_ADD5]](p0) :: (store (s32) into stack + 20, align 1)
-  ; X86:   [[COPY6:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-  ; X86:   [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY6]], [[C6]](s32)
-  ; X86:   G_STORE [[LOAD]](s32), [[PTR_ADD6]](p0) :: (store (s32) into stack + 24, align 1)
-  ; X86:   [[COPY7:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
-  ; X86:   [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY7]], [[C7]](s32)
-  ; X86:   G_STORE [[LOAD]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28, align 1)
-  ; X86:   CALLpcrel32 @simple_arg8_callee, csr_32, implicit $esp, implicit $ssp
-  ; X86:   ADJCALLSTACKUP32 32, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   RET 0
+  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16)
+  ; X86-NEXT:   ADJCALLSTACKDOWN32 32, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; X86-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
+  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1)
+  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; X86-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32)
+  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1)
+  ; X86-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; X86-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C2]](s32)
+  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 1)
+  ; X86-NEXT:   [[COPY3:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; X86-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C3]](s32)
+  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12, align 1)
+  ; X86-NEXT:   [[COPY4:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; X86-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C4]](s32)
+  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD4]](p0) :: (store (s32) into stack + 16, align 1)
+  ; X86-NEXT:   [[COPY5:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; X86-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C5]](s32)
+  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD5]](p0) :: (store (s32) into stack + 20, align 1)
+  ; X86-NEXT:   [[COPY6:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+  ; X86-NEXT:   [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY6]], [[C6]](s32)
+  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD6]](p0) :: (store (s32) into stack + 24, align 1)
+  ; X86-NEXT:   [[COPY7:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+  ; X86-NEXT:   [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY7]], [[C7]](s32)
+  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28, align 1)
+  ; X86-NEXT:   CALLpcrel32 @simple_arg8_callee, csr_32, implicit $esp, implicit $ssp
+  ; X86-NEXT:   ADJCALLSTACKUP32 32, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   RET 0
   ; X64-LABEL: name: test_simple_arg8_call
   ; X64: bb.1 (%ir-block.0):
-  ; X64:   liveins: $edi
-  ; X64:   [[COPY:%[0-9]+]]:_(s32) = COPY $edi
-  ; X64:   ADJCALLSTACKDOWN64 16, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   $edi = COPY [[COPY]](s32)
-  ; X64:   $esi = COPY [[COPY]](s32)
-  ; X64:   $edx = COPY [[COPY]](s32)
-  ; X64:   $ecx = COPY [[COPY]](s32)
-  ; X64:   $r8d = COPY [[COPY]](s32)
-  ; X64:   $r9d = COPY [[COPY]](s32)
-  ; X64:   [[COPY1:%[0-9]+]]:_(p0) = COPY $rsp
-  ; X64:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; X64:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
-  ; X64:   G_STORE [[COPY]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1)
-  ; X64:   [[COPY2:%[0-9]+]]:_(p0) = COPY $rsp
-  ; X64:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; X64:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C1]](s64)
-  ; X64:   G_STORE [[COPY]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 8, align 1)
-  ; X64:   CALL64pcrel32 @simple_arg8_callee, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit $edx, implicit $ecx, implicit $r8d, implicit $r9d
-  ; X64:   ADJCALLSTACKUP64 16, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   RET 0
+  ; X64-NEXT:   liveins: $edi
+  ; X64-NEXT: {{  $}}
+  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $edi
+  ; X64-NEXT:   ADJCALLSTACKDOWN64 16, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   $edi = COPY [[COPY]](s32)
+  ; X64-NEXT:   $esi = COPY [[COPY]](s32)
+  ; X64-NEXT:   $edx = COPY [[COPY]](s32)
+  ; X64-NEXT:   $ecx = COPY [[COPY]](s32)
+  ; X64-NEXT:   $r8d = COPY [[COPY]](s32)
+  ; X64-NEXT:   $r9d = COPY [[COPY]](s32)
+  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $rsp
+  ; X64-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; X64-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+  ; X64-NEXT:   G_STORE [[COPY]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1)
+  ; X64-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $rsp
+  ; X64-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; X64-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C1]](s64)
+  ; X64-NEXT:   G_STORE [[COPY]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 8, align 1)
+  ; X64-NEXT:   CALL64pcrel32 @simple_arg8_callee, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit $edx, implicit $ecx, implicit $r8d, implicit $r9d
+  ; X64-NEXT:   ADJCALLSTACKUP64 16, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   RET 0
   call void @simple_arg8_callee(i32 %in0, i32 %in0, i32 %in0, i32 %in0,i32 %in0, i32 %in0, i32 %in0, i32 %in0)
   ret void
 }
@@ -454,29 +466,29 @@ declare i32 @simple_return_callee(i32 %in0)
 define i32 @test_simple_return_callee() {
   ; X86-LABEL: name: test_simple_return_callee
   ; X86: bb.1 (%ir-block.0):
-  ; X86:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-  ; X86:   ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; X86:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s32)
-  ; X86:   G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1)
-  ; X86:   CALLpcrel32 @simple_return_callee, csr_32, implicit $esp, implicit $ssp, implicit-def $eax
-  ; X86:   [[COPY1:%[0-9]+]]:_(s32) = COPY $eax
-  ; X86:   ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY1]]
-  ; X86:   $eax = COPY [[ADD]](s32)
-  ; X86:   RET 0, implicit $eax
+  ; X86-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+  ; X86-NEXT:   ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; X86-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s32)
+  ; X86-NEXT:   G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1)
+  ; X86-NEXT:   CALLpcrel32 @simple_return_callee, csr_32, implicit $esp, implicit $ssp, implicit-def $eax
+  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $eax
+  ; X86-NEXT:   ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY1]]
+  ; X86-NEXT:   $eax = COPY [[ADD]](s32)
+  ; X86-NEXT:   RET 0, implicit $eax
   ; X64-LABEL: name: test_simple_return_callee
   ; X64: bb.1 (%ir-block.0):
-  ; X64:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-  ; X64:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   $edi = COPY [[C]](s32)
-  ; X64:   CALL64pcrel32 @simple_return_callee, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit-def $eax
-  ; X64:   [[COPY:%[0-9]+]]:_(s32) = COPY $eax
-  ; X64:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]]
-  ; X64:   $eax = COPY [[ADD]](s32)
-  ; X64:   RET 0, implicit $eax
+  ; X64-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   $edi = COPY [[C]](s32)
+  ; X64-NEXT:   CALL64pcrel32 @simple_return_callee, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit-def $eax
+  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $eax
+  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]]
+  ; X64-NEXT:   $eax = COPY [[ADD]](s32)
+  ; X64-NEXT:   RET 0, implicit $eax
   %call = call i32 @simple_return_callee(i32 5)
   %r = add i32 %call, %call
   ret i32 %r
@@ -486,51 +498,53 @@ declare <8 x i32> @split_return_callee(<8 x i32> %in0)
 define <8 x i32> @test_split_return_callee(<8 x i32> %arg1, <8 x i32> %arg2) {
   ; X86-LABEL: name: test_split_return_callee
   ; X86: bb.1 (%ir-block.0):
-  ; X86:   liveins: $xmm0, $xmm1, $xmm2
-  ; X86:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
-  ; X86:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
-  ; X86:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
-  ; X86:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $xmm2
-  ; X86:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; X86:   [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s32>) from %fixed-stack.0)
-  ; X86:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY2]](<4 x s32>), [[LOAD]](<4 x s32>)
-  ; X86:   ADJCALLSTACKDOWN32 0, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<8 x s32>)
-  ; X86:   $xmm0 = COPY [[UV]](<4 x s32>)
-  ; X86:   $xmm1 = COPY [[UV1]](<4 x s32>)
-  ; X86:   CALLpcrel32 @split_return_callee, csr_32, implicit $esp, implicit $ssp, implicit $xmm0, implicit $xmm1, implicit-def $xmm0, implicit-def $xmm1
-  ; X86:   [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
-  ; X86:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
-  ; X86:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY3]](<4 x s32>), [[COPY4]](<4 x s32>)
-  ; X86:   ADJCALLSTACKUP32 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   [[ADD:%[0-9]+]]:_(<8 x s32>) = G_ADD [[CONCAT_VECTORS]], [[CONCAT_VECTORS2]]
-  ; X86:   [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[ADD]](<8 x s32>)
-  ; X86:   $xmm0 = COPY [[UV2]](<4 x s32>)
-  ; X86:   $xmm1 = COPY [[UV3]](<4 x s32>)
-  ; X86:   RET 0, implicit $xmm0, implicit $xmm1
+  ; X86-NEXT:   liveins: $xmm0, $xmm1, $xmm2
+  ; X86-NEXT: {{  $}}
+  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
+  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
+  ; X86-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
+  ; X86-NEXT:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $xmm2
+  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s32>) from %fixed-stack.0)
+  ; X86-NEXT:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY2]](<4 x s32>), [[LOAD]](<4 x s32>)
+  ; X86-NEXT:   ADJCALLSTACKDOWN32 0, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<8 x s32>)
+  ; X86-NEXT:   $xmm0 = COPY [[UV]](<4 x s32>)
+  ; X86-NEXT:   $xmm1 = COPY [[UV1]](<4 x s32>)
+  ; X86-NEXT:   CALLpcrel32 @split_return_callee, csr_32, implicit $esp, implicit $ssp, implicit $xmm0, implicit $xmm1, implicit-def $xmm0, implicit-def $xmm1
+  ; X86-NEXT:   [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
+  ; X86-NEXT:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
+  ; X86-NEXT:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY3]](<4 x s32>), [[COPY4]](<4 x s32>)
+  ; X86-NEXT:   ADJCALLSTACKUP32 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   [[ADD:%[0-9]+]]:_(<8 x s32>) = G_ADD [[CONCAT_VECTORS]], [[CONCAT_VECTORS2]]
+  ; X86-NEXT:   [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[ADD]](<8 x s32>)
+  ; X86-NEXT:   $xmm0 = COPY [[UV2]](<4 x s32>)
+  ; X86-NEXT:   $xmm1 = COPY [[UV3]](<4 x s32>)
+  ; X86-NEXT:   RET 0, implicit $xmm0, implicit $xmm1
   ; X64-LABEL: name: test_split_return_callee
   ; X64: bb.1 (%ir-block.0):
-  ; X64:   liveins: $xmm0, $xmm1, $xmm2, $xmm3
-  ; X64:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
-  ; X64:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
-  ; X64:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
-  ; X64:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $xmm2
-  ; X64:   [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $xmm3
-  ; X64:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY2]](<4 x s32>), [[COPY3]](<4 x s32>)
-  ; X64:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<8 x s32>)
-  ; X64:   $xmm0 = COPY [[UV]](<4 x s32>)
-  ; X64:   $xmm1 = COPY [[UV1]](<4 x s32>)
-  ; X64:   CALL64pcrel32 @split_return_callee, csr_64, implicit $rsp, implicit $ssp, implicit $xmm0, implicit $xmm1, implicit-def $xmm0, implicit-def $xmm1
-  ; X64:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
-  ; X64:   [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
-  ; X64:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY4]](<4 x s32>), [[COPY5]](<4 x s32>)
-  ; X64:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   [[ADD:%[0-9]+]]:_(<8 x s32>) = G_ADD [[CONCAT_VECTORS]], [[CONCAT_VECTORS2]]
-  ; X64:   [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[ADD]](<8 x s32>)
-  ; X64:   $xmm0 = COPY [[UV2]](<4 x s32>)
-  ; X64:   $xmm1 = COPY [[UV3]](<4 x s32>)
-  ; X64:   RET 0, implicit $xmm0, implicit $xmm1
+  ; X64-NEXT:   liveins: $xmm0, $xmm1, $xmm2, $xmm3
+  ; X64-NEXT: {{  $}}
+  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
+  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
+  ; X64-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
+  ; X64-NEXT:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $xmm2
+  ; X64-NEXT:   [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $xmm3
+  ; X64-NEXT:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY2]](<4 x s32>), [[COPY3]](<4 x s32>)
+  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<8 x s32>)
+  ; X64-NEXT:   $xmm0 = COPY [[UV]](<4 x s32>)
+  ; X64-NEXT:   $xmm1 = COPY [[UV1]](<4 x s32>)
+  ; X64-NEXT:   CALL64pcrel32 @split_return_callee, csr_64, implicit $rsp, implicit $ssp, implicit $xmm0, implicit $xmm1, implicit-def $xmm0, implicit-def $xmm1
+  ; X64-NEXT:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
+  ; X64-NEXT:   [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
+  ; X64-NEXT:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY4]](<4 x s32>), [[COPY5]](<4 x s32>)
+  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   [[ADD:%[0-9]+]]:_(<8 x s32>) = G_ADD [[CONCAT_VECTORS]], [[CONCAT_VECTORS2]]
+  ; X64-NEXT:   [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[ADD]](<8 x s32>)
+  ; X64-NEXT:   $xmm0 = COPY [[UV2]](<4 x s32>)
+  ; X64-NEXT:   $xmm1 = COPY [[UV3]](<4 x s32>)
+  ; X64-NEXT:   RET 0, implicit $xmm0, implicit $xmm1
   %call = call <8 x i32> @split_return_callee(<8 x i32> %arg2)
   %r = add <8 x i32> %arg1, %call
   ret  <8 x i32> %r
@@ -539,20 +553,21 @@ define <8 x i32> @test_split_return_callee(<8 x i32> %arg1, <8 x i32> %arg2) {
 define void @test_indirect_call(void()* %func) {
   ; X86-LABEL: name: test_indirect_call
   ; X86: bb.1 (%ir-block.0):
-  ; X86:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; X86:   [[LOAD:%[0-9]+]]:gr32(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.0, align 16)
-  ; X86:   ADJCALLSTACKDOWN32 0, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   CALL32r [[LOAD]](p0), csr_32, implicit $esp, implicit $ssp
-  ; X86:   ADJCALLSTACKUP32 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   RET 0
+  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; X86-NEXT:   [[LOAD:%[0-9]+]]:gr32(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.0, align 16)
+  ; X86-NEXT:   ADJCALLSTACKDOWN32 0, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   CALL32r [[LOAD]](p0), csr_32, implicit $esp, implicit $ssp
+  ; X86-NEXT:   ADJCALLSTACKUP32 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   RET 0
   ; X64-LABEL: name: test_indirect_call
   ; X64: bb.1 (%ir-block.0):
-  ; X64:   liveins: $rdi
-  ; X64:   [[COPY:%[0-9]+]]:gr64(p0) = COPY $rdi
-  ; X64:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   CALL64r [[COPY]](p0), csr_64, implicit $rsp, implicit $ssp
-  ; X64:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   RET 0
+  ; X64-NEXT:   liveins: $rdi
+  ; X64-NEXT: {{  $}}
+  ; X64-NEXT:   [[COPY:%[0-9]+]]:gr64(p0) = COPY $rdi
+  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   CALL64r [[COPY]](p0), csr_64, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   RET 0
   call void %func()
   ret void
 }
@@ -562,53 +577,54 @@ declare void @take_char(i8)
 define void @test_abi_exts_call(i8* %addr) {
   ; X86-LABEL: name: test_abi_exts_call
   ; X86: bb.1 (%ir-block.0):
-  ; X86:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; X86:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.0, align 16)
-  ; X86:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p0) :: (load (s8) from %ir.addr)
-  ; X86:   ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; X86:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
-  ; X86:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8)
-  ; X86:   G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store (s8) into stack)
-  ; X86:   CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp
-  ; X86:   ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s32)
-  ; X86:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8)
-  ; X86:   G_STORE [[SEXT]](s32), [[PTR_ADD1]](p0) :: (store (s8) into stack)
-  ; X86:   CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp
-  ; X86:   ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   [[COPY2:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s32)
-  ; X86:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
-  ; X86:   G_STORE [[ZEXT]](s32), [[PTR_ADD2]](p0) :: (store (s8) into stack)
-  ; X86:   CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp
-  ; X86:   ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   RET 0
+  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.0, align 16)
+  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p0) :: (load (s8) from %ir.addr)
+  ; X86-NEXT:   ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; X86-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
+  ; X86-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8)
+  ; X86-NEXT:   G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store (s8) into stack)
+  ; X86-NEXT:   CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp
+  ; X86-NEXT:   ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s32)
+  ; X86-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8)
+  ; X86-NEXT:   G_STORE [[SEXT]](s32), [[PTR_ADD1]](p0) :: (store (s8) into stack)
+  ; X86-NEXT:   CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp
+  ; X86-NEXT:   ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s32)
+  ; X86-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
+  ; X86-NEXT:   G_STORE [[ZEXT]](s32), [[PTR_ADD2]](p0) :: (store (s8) into stack)
+  ; X86-NEXT:   CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp
+  ; X86-NEXT:   ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   RET 0
   ; X64-LABEL: name: test_abi_exts_call
   ; X64: bb.1 (%ir-block.0):
-  ; X64:   liveins: $rdi
-  ; X64:   [[COPY:%[0-9]+]]:_(p0) = COPY $rdi
-  ; X64:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr)
-  ; X64:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8)
-  ; X64:   $edi = COPY [[ANYEXT]](s32)
-  ; X64:   CALL64pcrel32 @take_char, csr_64, implicit $rsp, implicit $ssp, implicit $edi
-  ; X64:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8)
-  ; X64:   $edi = COPY [[SEXT]](s32)
-  ; X64:   CALL64pcrel32 @take_char, csr_64, implicit $rsp, implicit $ssp, implicit $edi
-  ; X64:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8)
-  ; X64:   $edi = COPY [[ZEXT]](s32)
-  ; X64:   CALL64pcrel32 @take_char, csr_64, implicit $rsp, implicit $ssp, implicit $edi
-  ; X64:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   RET 0
+  ; X64-NEXT:   liveins: $rdi
+  ; X64-NEXT: {{  $}}
+  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $rdi
+  ; X64-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr)
+  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8)
+  ; X64-NEXT:   $edi = COPY [[ANYEXT]](s32)
+  ; X64-NEXT:   CALL64pcrel32 @take_char, csr_64, implicit $rsp, implicit $ssp, implicit $edi
+  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8)
+  ; X64-NEXT:   $edi = COPY [[SEXT]](s32)
+  ; X64-NEXT:   CALL64pcrel32 @take_char, csr_64, implicit $rsp, implicit $ssp, implicit $edi
+  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8)
+  ; X64-NEXT:   $edi = COPY [[ZEXT]](s32)
+  ; X64-NEXT:   CALL64pcrel32 @take_char, csr_64, implicit $rsp, implicit $ssp, implicit $edi
+  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   RET 0
   %val = load i8, i8* %addr
   call void @take_char(i8 %val)
   call void @take_char(i8 signext %val)
@@ -620,38 +636,39 @@ declare void @variadic_callee(i8*, ...)
 define void @test_variadic_call_1(i8** %addr_ptr, i32* %val_ptr) {
   ; X86-LABEL: name: test_variadic_call_1
   ; X86: bb.1 (%ir-block.0):
-  ; X86:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
-  ; X86:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1, align 16)
-  ; X86:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; X86:   [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0)
-  ; X86:   [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[LOAD]](p0) :: (load (p0) from %ir.addr_ptr)
-  ; X86:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[LOAD1]](p0) :: (load (s32) from %ir.val_ptr)
-  ; X86:   ADJCALLSTACKDOWN32 8, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; X86:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
-  ; X86:   G_STORE [[LOAD2]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
-  ; X86:   [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; X86:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32)
-  ; X86:   G_STORE [[LOAD3]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1)
-  ; X86:   CALLpcrel32 @variadic_callee, csr_32, implicit $esp, implicit $ssp
-  ; X86:   ADJCALLSTACKUP32 8, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   RET 0
+  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1, align 16)
+  ; X86-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0)
+  ; X86-NEXT:   [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[LOAD]](p0) :: (load (p0) from %ir.addr_ptr)
+  ; X86-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[LOAD1]](p0) :: (load (s32) from %ir.val_ptr)
+  ; X86-NEXT:   ADJCALLSTACKDOWN32 8, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; X86-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
+  ; X86-NEXT:   G_STORE [[LOAD2]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
+  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; X86-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32)
+  ; X86-NEXT:   G_STORE [[LOAD3]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1)
+  ; X86-NEXT:   CALLpcrel32 @variadic_callee, csr_32, implicit $esp, implicit $ssp
+  ; X86-NEXT:   ADJCALLSTACKUP32 8, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   RET 0
   ; X64-LABEL: name: test_variadic_call_1
   ; X64: bb.1 (%ir-block.0):
-  ; X64:   liveins: $rdi, $rsi
-  ; X64:   [[COPY:%[0-9]+]]:_(p0) = COPY $rdi
-  ; X64:   [[COPY1:%[0-9]+]]:_(p0) = COPY $rsi
-  ; X64:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0) from %ir.addr_ptr)
-  ; X64:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.val_ptr)
-  ; X64:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   $rdi = COPY [[LOAD]](p0)
-  ; X64:   $esi = COPY [[LOAD1]](s32)
-  ; X64:   $al = MOV8ri 0
-  ; X64:   CALL64pcrel32 @variadic_callee, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $esi, implicit $al
-  ; X64:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   RET 0
+  ; X64-NEXT:   liveins: $rdi, $rsi
+  ; X64-NEXT: {{  $}}
+  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $rdi
+  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $rsi
+  ; X64-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0) from %ir.addr_ptr)
+  ; X64-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.val_ptr)
+  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   $rdi = COPY [[LOAD]](p0)
+  ; X64-NEXT:   $esi = COPY [[LOAD1]](s32)
+  ; X64-NEXT:   $al = MOV8ri 0
+  ; X64-NEXT:   CALL64pcrel32 @variadic_callee, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $esi, implicit $al
+  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   RET 0
   %addr = load i8*, i8** %addr_ptr
   %val = load i32, i32* %val_ptr
   call void (i8*, ...) @variadic_callee(i8* %addr, i32 %val)
@@ -661,38 +678,39 @@ define void @test_variadic_call_1(i8** %addr_ptr, i32* %val_ptr) {
 define void @test_variadic_call_2(i8** %addr_ptr, double* %val_ptr) {
   ; X86-LABEL: name: test_variadic_call_2
   ; X86: bb.1 (%ir-block.0):
-  ; X86:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
-  ; X86:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1, align 16)
-  ; X86:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
-  ; X86:   [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0)
-  ; X86:   [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[LOAD]](p0) :: (load (p0) from %ir.addr_ptr)
-  ; X86:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[LOAD1]](p0) :: (load (s64) from %ir.val_ptr, align 4)
-  ; X86:   ADJCALLSTACKDOWN32 12, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; X86:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
-  ; X86:   G_STORE [[LOAD2]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
-  ; X86:   [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
-  ; X86:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; X86:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32)
-  ; X86:   G_STORE [[LOAD3]](s64), [[PTR_ADD1]](p0) :: (store (s64) into stack + 4, align 1)
-  ; X86:   CALLpcrel32 @variadic_callee, csr_32, implicit $esp, implicit $ssp
-  ; X86:   ADJCALLSTACKUP32 12, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
-  ; X86:   RET 0
+  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1, align 16)
+  ; X86-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0)
+  ; X86-NEXT:   [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[LOAD]](p0) :: (load (p0) from %ir.addr_ptr)
+  ; X86-NEXT:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[LOAD1]](p0) :: (load (s64) from %ir.val_ptr, align 4)
+  ; X86-NEXT:   ADJCALLSTACKDOWN32 12, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; X86-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
+  ; X86-NEXT:   G_STORE [[LOAD2]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
+  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
+  ; X86-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; X86-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32)
+  ; X86-NEXT:   G_STORE [[LOAD3]](s64), [[PTR_ADD1]](p0) :: (store (s64) into stack + 4, align 1)
+  ; X86-NEXT:   CALLpcrel32 @variadic_callee, csr_32, implicit $esp, implicit $ssp
+  ; X86-NEXT:   ADJCALLSTACKUP32 12, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+  ; X86-NEXT:   RET 0
   ; X64-LABEL: name: test_variadic_call_2
   ; X64: bb.1 (%ir-block.0):
-  ; X64:   liveins: $rdi, $rsi
-  ; X64:   [[COPY:%[0-9]+]]:_(p0) = COPY $rdi
-  ; X64:   [[COPY1:%[0-9]+]]:_(p0) = COPY $rsi
-  ; X64:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0) from %ir.addr_ptr)
-  ; X64:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p0) :: (load (s64) from %ir.val_ptr)
-  ; X64:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   $rdi = COPY [[LOAD]](p0)
-  ; X64:   $xmm0 = COPY [[LOAD1]](s64)
-  ; X64:   $al = MOV8ri 1
-  ; X64:   CALL64pcrel32 @variadic_callee, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $xmm0, implicit $al
-  ; X64:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
-  ; X64:   RET 0
+  ; X64-NEXT:   liveins: $rdi, $rsi
+  ; X64-NEXT: {{  $}}
+  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $rdi
+  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $rsi
+  ; X64-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0) from %ir.addr_ptr)
+  ; X64-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p0) :: (load (s64) from %ir.val_ptr)
+  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   $rdi = COPY [[LOAD]](p0)
+  ; X64-NEXT:   $xmm0 = COPY [[LOAD1]](s64)
+  ; X64-NEXT:   $al = MOV8ri 1
+  ; X64-NEXT:   CALL64pcrel32 @variadic_callee, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $xmm0, implicit $al
+  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+  ; X64-NEXT:   RET 0
   %addr = load i8*, i8** %addr_ptr
   %val = load double, double* %val_ptr
   call void (i8*, ...) @variadic_callee(i8* %addr, double %val)


        


More information about the llvm-commits mailing list