[llvm] 37a203f - AMDGPU: Regenerate more mir test checks with -NEXT

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 18 08:39:16 PST 2021


Author: Matt Arsenault
Date: 2021-12-18T11:38:30-05:00
New Revision: 37a203f63e7bc6da744babac94841e860e74bbab

URL: https://github.com/llvm/llvm-project/commit/37a203f63e7bc6da744babac94841e860e74bbab
DIFF: https://github.com/llvm/llvm-project/commit/37a203f63e7bc6da744babac94841e860e74bbab.diff

LOG: AMDGPU: Regenerate more mir test checks with -NEXT

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fence.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll
index 6c6a6680d60f..6487cba23663 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll
@@ -6,10 +6,11 @@ declare void @llvm.amdgcn.s.sendmsg(i32 immarg, i32)
 define amdgpu_ps void @test_sendmsg(i32 inreg %m0) {
   ; CHECK-LABEL: name: test_sendmsg
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
-  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 12, [[COPY]](s32)
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+  ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 12, [[COPY]](s32)
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @llvm.amdgcn.s.sendmsg(i32 12, i32 %m0)
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
index e0f556a3cc18..8992621229a8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
@@ -5,30 +5,32 @@
 define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
   ; HSA-VI-LABEL: name: i8_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
-  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
-  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
+  ; HSA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: i8_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
-  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = zext i8 %in to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -37,30 +39,32 @@ define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) noun
 define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
   ; HSA-VI-LABEL: name: i8_zext_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
-  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
-  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
+  ; HSA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: i8_zext_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
-  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = zext i8 %in to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -69,30 +73,32 @@ define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zero
 define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
   ; HSA-VI-LABEL: name: i8_sext_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
-  ; HSA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8)
-  ; HSA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8)
+  ; HSA-VI-NEXT:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: i8_sext_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8)
-  ; LEGACY-MESA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = sext i8 %in to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -101,30 +107,32 @@ define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 sign
 define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
   ; HSA-VI-LABEL: name: i16_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4)
-  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
-  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
+  ; HSA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: i16_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
-  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = zext i16 %in to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -133,30 +141,32 @@ define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) no
 define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
   ; HSA-VI-LABEL: name: i16_zext_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4)
-  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
-  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
+  ; HSA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: i16_zext_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
-  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = zext i16 %in to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -165,30 +175,32 @@ define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 ze
 define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
   ; HSA-VI-LABEL: name: i16_sext_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4)
-  ; HSA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16)
-  ; HSA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16)
+  ; HSA-VI-NEXT:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: i16_sext_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16)
-  ; LEGACY-MESA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = sext i16 %in to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -197,28 +209,30 @@ define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 si
 define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
   ; HSA-VI-LABEL: name: i32_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: i32_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store i32 %in, i32 addrspace(1)* %out, align 4
   ret void
@@ -227,28 +241,30 @@ entry:
 define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
   ; HSA-VI-LABEL: name: f32_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: f32_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store float %in, float addrspace(1)* %out, align 4
   ret void
@@ -257,28 +273,30 @@ entry:
 define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
   ; HSA-VI-LABEL: name: v2i8_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 8, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v2i8_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <2 x i8> %in, <2 x i8> addrspace(1)* %out
   ret void
@@ -287,28 +305,30 @@ entry:
 define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
   ; HSA-VI-LABEL: name: v2i16_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), align 8, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v2i16_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <2 x i16> %in, <2 x i16> addrspace(1)* %out
   ret void
@@ -317,28 +337,30 @@ entry:
 define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
   ; HSA-VI-LABEL: name: v2i32_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v2i32_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
   ret void
@@ -347,28 +369,30 @@ entry:
 define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
   ; HSA-VI-LABEL: name: v2f32_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v2f32_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
   ret void
@@ -377,28 +401,30 @@ entry:
 define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
   ; HSA-VI-LABEL: name: v3i8_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 8, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v3i8_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
   ret void
@@ -407,28 +433,30 @@ entry:
 define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
   ; HSA-VI-LABEL: name: v3i16_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 8, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v3i16_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
   ret void
@@ -437,28 +465,30 @@ entry:
 define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
   ; HSA-VI-LABEL: name: v3i32_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v3i32_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
   ret void
@@ -467,28 +497,30 @@ entry:
 define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
   ; HSA-VI-LABEL: name: v3f32_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v3f32_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
   ret void
@@ -497,28 +529,30 @@ entry:
 define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
   ; HSA-VI-LABEL: name: v4i8_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), align 8, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v4i8_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <4 x i8> %in, <4 x i8> addrspace(1)* %out
   ret void
@@ -527,28 +561,30 @@ entry:
 define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
   ; HSA-VI-LABEL: name: v4i16_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v4i16_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <4 x i16> %in, <4 x i16> addrspace(1)* %out
   ret void
@@ -557,28 +593,30 @@ entry:
 define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
   ; HSA-VI-LABEL: name: v4i32_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v4i32_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
   ret void
@@ -587,28 +625,30 @@ entry:
 define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
   ; HSA-VI-LABEL: name: v4f32_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v4f32_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
   ret void
@@ -617,28 +657,30 @@ entry:
 define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
   ; HSA-VI-LABEL: name: v8i8_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v8i8_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <8 x i8> %in, <8 x i8> addrspace(1)* %out
   ret void
@@ -647,28 +689,30 @@ entry:
 define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
   ; HSA-VI-LABEL: name: v8i16_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v8i16_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <8 x i16> %in, <8 x i16> addrspace(1)* %out
   ret void
@@ -677,28 +721,30 @@ entry:
 define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
   ; HSA-VI-LABEL: name: v8i32_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v8i32_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
   ret void
@@ -707,28 +753,30 @@ entry:
 define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
   ; HSA-VI-LABEL: name: v8f32_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v8f32_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
   ret void
@@ -737,28 +785,30 @@ entry:
 define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
   ; HSA-VI-LABEL: name: v16i8_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v16i8_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <16 x i8> %in, <16 x i8> addrspace(1)* %out
   ret void
@@ -767,28 +817,30 @@ entry:
 define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
   ; HSA-VI-LABEL: name: v16i16_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 16, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v16i16_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <16 x i16> %in, <16 x i16> addrspace(1)* %out
   ret void
@@ -797,28 +849,30 @@ entry:
 define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
   ; HSA-VI-LABEL: name: v16i32_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v16i32_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
   ret void
@@ -827,28 +881,30 @@ entry:
 define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
   ; HSA-VI-LABEL: name: v16f32_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v16f32_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
   ret void
@@ -857,28 +913,30 @@ entry:
 define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
   ; HSA-VI-LABEL: name: kernel_arg_i64
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: kernel_arg_i64
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   store i64 %a, i64 addrspace(1)* %out, align 8
   ret void
 }
@@ -886,28 +944,30 @@ define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwi
 define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double  %in) {
   ; HSA-VI-LABEL: name: f64_kernel_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: f64_kernel_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   store double %in, double addrspace(1)* %out
   ret void
@@ -916,28 +976,30 @@ entry:
 define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
   ; HSA-VI-LABEL: name: i1_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: i1_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   store i1 %x, i1 addrspace(1)* %out, align 1
   ret void
 }
@@ -945,30 +1007,32 @@ define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
 define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
   ; HSA-VI-LABEL: name: i1_arg_zext_i32
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
-  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1)
-  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1)
+  ; HSA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i32
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1)
-  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = zext i1 %x to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -977,30 +1041,32 @@ define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwi
 define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
   ; HSA-VI-LABEL: name: i1_arg_zext_i64
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
-  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1)
-  ; HSA-VI:   G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1)
+  ; HSA-VI-NEXT:   G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i64
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1)
-  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = zext i1 %x to i64
   store i64 %ext, i64 addrspace(1)* %out, align 8
   ret void
@@ -1009,30 +1075,32 @@ define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwi
 define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
   ; HSA-VI-LABEL: name: i1_arg_sext_i32
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
-  ; HSA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1)
-  ; HSA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1)
+  ; HSA-VI-NEXT:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i32
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1)
-  ; LEGACY-MESA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = sext i1 %x to i32
   store i32 %ext, i32addrspace(1)* %out, align 4
   ret void
@@ -1041,30 +1109,32 @@ define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwi
 define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
   ; HSA-VI-LABEL: name: i1_arg_sext_i64
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
-  ; HSA-VI:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1)
-  ; HSA-VI:   G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1)
+  ; HSA-VI-NEXT:   G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i64
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1)
-  ; LEGACY-MESA-VI:   G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = sext i1 %x to i64
   store i64 %ext, i64 addrspace(1)* %out, align 8
   ret void
@@ -1075,24 +1145,26 @@ define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwi
 define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind {
   ; HSA-VI-LABEL: name: empty_struct_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
-  ; HSA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; HSA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: empty_struct_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
-  ; LEGACY-MESA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   store i32 %arg1, i32 addrspace(1)* undef
   ret void
 }
@@ -1100,24 +1172,26 @@ define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind {
 define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind {
   ; HSA-VI-LABEL: name: empty_array_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
-  ; HSA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; HSA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: empty_array_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
-  ; LEGACY-MESA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   store i32 %arg1, i32 addrspace(1)* undef
   ret void
 }
@@ -1133,60 +1207,62 @@ define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind {
 define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad, {i32, i64} %arg1) {
   ; HSA-VI-LABEL: name: struct_argument_alignment
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
-  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
-  ; HSA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
-  ; HSA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-  ; HSA-VI:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
-  ; HSA-VI:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-  ; HSA-VI:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-  ; HSA-VI:   [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
-  ; HSA-VI:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; HSA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; HSA-VI:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; HSA-VI:   G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
+  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+  ; HSA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+  ; HSA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+  ; HSA-VI-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+  ; HSA-VI-NEXT:   [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
+  ; HSA-VI-NEXT:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
+  ; HSA-VI-NEXT:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: struct_argument_alignment
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
-  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
-  ; LEGACY-MESA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
-  ; LEGACY-MESA-VI:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
-  ; LEGACY-MESA-VI:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; LEGACY-MESA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; LEGACY-MESA-VI:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; LEGACY-MESA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
+  ; LEGACY-MESA-VI-NEXT:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %val0 = extractvalue {i32, i64} %arg0, 0
   %val1 = extractvalue {i32, i64} %arg0, 1
   %val2 = extractvalue {i32, i64} %arg1, 0
@@ -1202,62 +1278,64 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad,
 define amdgpu_kernel void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %arg0, i8 %pad, {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1) {
   ; HSA-VI-LABEL: name: pointer_in_struct_argument
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
-  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
-  ; HSA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
-  ; HSA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-  ; HSA-VI:   [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
-  ; HSA-VI:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-  ; HSA-VI:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-  ; HSA-VI:   [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
-  ; HSA-VI:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; HSA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; HSA-VI:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; HSA-VI:   [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; HSA-VI:   G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
+  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+  ; HSA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+  ; HSA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
+  ; HSA-VI-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+  ; HSA-VI-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+  ; HSA-VI-NEXT:   [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
+  ; HSA-VI-NEXT:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
+  ; HSA-VI-NEXT:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
+  ; HSA-VI-NEXT:   [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: pointer_in_struct_argument
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
-  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
-  ; LEGACY-MESA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
-  ; LEGACY-MESA-VI:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
-  ; LEGACY-MESA-VI:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; LEGACY-MESA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; LEGACY-MESA-VI:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; LEGACY-MESA-VI:   [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; LEGACY-MESA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
+  ; LEGACY-MESA-VI-NEXT:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
+  ; LEGACY-MESA-VI-NEXT:   [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %val0 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 0
   %val1 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 1
   %val2 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 0
@@ -1275,50 +1353,52 @@ define amdgpu_kernel void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addr
 define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) {
   ; HSA-VI-LABEL: name: packed_struct_argument_alignment
   ; HSA-VI: bb.1 (%ir-block.1):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
-  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 13
-  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4)
-  ; HSA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 17
-  ; HSA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-  ; HSA-VI:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4)
-  ; HSA-VI:   [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; HSA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1)
-  ; HSA-VI:   G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 13
+  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4)
+  ; HSA-VI-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 17
+  ; HSA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+  ; HSA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4)
+  ; HSA-VI-NEXT:   [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment
   ; LEGACY-MESA-VI: bb.1 (%ir-block.1):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 49
-  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 53
-  ; LEGACY-MESA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; LEGACY-MESA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 49
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 53
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; LEGACY-MESA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %val0 = extractvalue <{i32, i64}> %arg0, 0
   %val1 = extractvalue <{i32, i64}> %arg0, 1
   %val2 = extractvalue <{i32, i64}> %arg1, 0
@@ -1333,14 +1413,16 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0,
 define amdgpu_kernel void @unused_i32_arg(i32 addrspace(1)* nocapture %out, i32 %unused, i32 %in) nounwind {
   ; HSA-VI-LABEL: name: unused_i32_arg
   ; HSA-VI: bb.1.entry:
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: unused_i32_arg
   ; LEGACY-MESA-VI: bb.1.entry:
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
   ret void
 }
@@ -1349,30 +1431,32 @@ entry:
 define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %out, i8 addrspace(4)* byref(i8) %in.byref) {
   ; HSA-VI-LABEL: name: byref_constant_i8_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4)
-  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
-  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4)
+  ; HSA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
+  ; HSA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: byref_constant_i8_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4)
-  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
-  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %in = load i8, i8 addrspace(4)* %in.byref
   %ext = zext i8 %in to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
@@ -1382,30 +1466,32 @@ define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %ou
 define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %out, i16 addrspace(4)* byref(i16) align 2 %in.byref) {
   ; HSA-VI-LABEL: name: byref_constant_i16_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4)
-  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
-  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4)
+  ; HSA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
+  ; HSA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: byref_constant_i16_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4)
-  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
-  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %in = load i16, i16 addrspace(4)* %in.byref
   %ext = zext i16 %in to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
@@ -1415,36 +1501,38 @@ define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %o
 define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align 4 %in.byref, i32 %after.offset) {
   ; HSA-VI-LABEL: name: byref_constant_i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
-  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
+  ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
-  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %in = load i32, i32 addrspace(4)* %in.byref
   store volatile i32 %in, i32 addrspace(1)* %out, align 4
   store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
@@ -1454,36 +1542,38 @@ define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %o
 define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> addrspace(4)* byref(<4 x i32>) align(16) %in.byref, i32 %after.offset) {
   ; HSA-VI-LABEL: name: byref_constant_v4i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
-  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.cast, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.cast, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: byref_constant_v4i32_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
-  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
-  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.cast, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.cast, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %in = load <4 x i32>, <4 x i32> addrspace(4)* %in.byref
   store volatile <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
   %out.cast = bitcast <4 x i32> addrspace(1)* %out to i32 addrspace(1)*
@@ -1494,36 +1584,38 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* noca
 define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(256) %in.byref, i32 %after.offset) {
   ; HSA-VI-LABEL: name: byref_align_constant_i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 260
-  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
-  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 260
+  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
+  ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: byref_align_constant_i32_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 292
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 296
-  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
-  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 292
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 296
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %in = load i32, i32 addrspace(4)* %in.byref
   store volatile i32 %in, i32 addrspace(1)* %out, align 4
   store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
@@ -1533,36 +1625,38 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapt
 define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace(1)* nocapture %out, i8, <16 x i32> addrspace(4)* byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) {
   ; HSA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg
   ; HSA-VI: bb.1 (%ir-block.1):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
-  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
-  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.cast.out, align 4, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
+  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.cast.out, align 4, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.1):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 164
-  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
-  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.cast.out, align 4, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 164
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.cast.out, align 4, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %in = load <16 x i32>, <16 x i32> addrspace(4)* %in.byref
   %cast.out = bitcast i32 addrspace(1)* %out to <16 x i32> addrspace(1)*
   store volatile <16 x i32> %in, <16 x i32> addrspace(1)* %cast.out, align 4
@@ -1574,32 +1668,34 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace
 define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* byref(i32) align(4) %in.byref) {
   ; HSA-VI-LABEL: name: byref_global_i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
-  ; HSA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load (s32) from %ir.1, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
+  ; HSA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load (s32) from %ir.1, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: byref_global_i32_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
-  ; LEGACY-MESA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load (s32) from %ir.1, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
+  ; LEGACY-MESA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load (s32) from %ir.1, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %in = load i32, i32 addrspace(1)* %in.byref
   store i32 %in, i32 addrspace(1)* %out, align 4
   ret void
@@ -1608,30 +1704,32 @@ define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out
 define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, i32* byref(i32) align(4) %in.byref) {
   ; HSA-VI-LABEL: name: byref_flat_i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref)
-  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: byref_flat_i32_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %in = load i32, i32* %in.byref
   store i32 %in, i32 addrspace(1)* %out, align 4
   ret void
@@ -1640,30 +1738,32 @@ define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out,
 define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(6)* byref(i32) align(4) %in.byref) {
   ; HSA-VI-LABEL: name: byref_constant_32bit_i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6)
-  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: byref_constant_32bit_i32_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %in = load i32, i32 addrspace(6)* %in.byref
   store i32 %in, i32 addrspace(1)* %out, align 4
   ret void
@@ -1672,30 +1772,32 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapt
 define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(999)* byref(i32) align(4) %in.byref) {
   ; HSA-VI-LABEL: name: byref_unknown_as_i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999)
-  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: byref_unknown_as_i32_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %in = load i32, i32 addrspace(999)* %in.byref
   store i32 %in, i32 addrspace(1)* %out, align 4
   ret void
@@ -1705,30 +1807,32 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture
 define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(3)* byref(i32) align(4) %in.byref) {
   ; HSA-VI-LABEL: name: byref_local_i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3)
-  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: byref_local_i32_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %in = load i32, i32 addrspace(3)* %in.byref
   store i32 %in, i32 addrspace(1)* %out, align 4
   ret void
@@ -1737,44 +1841,46 @@ define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out,
 define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(4) %in0.byref, i32 addrspace(4)* byref(i32) align(4) %in1.byref, i32 %after.offset) {
   ; HSA-VI-LABEL: name: multi_byref_constant_i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; HSA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; HSA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
-  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4)
-  ; HSA-VI:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; HSA-VI-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; HSA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4)
+  ; HSA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: multi_byref_constant_i32_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
-  ; LEGACY-MESA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
-  ; LEGACY-MESA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
-  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4)
-  ; LEGACY-MESA-VI:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %in0 = load i32, i32 addrspace(4)* %in0.byref
   %in1 = load i32, i32 addrspace(4)* %in1.byref
   store volatile i32 %in0, i32 addrspace(1)* %out, align 4
@@ -1786,24 +1892,26 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapt
 define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byref(i32) align(4) %in.byref) {
   ; HSA-VI-LABEL: name: byref_constant_i32_arg_offset0
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
-  ; HSA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg_offset0
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %in = load i32, i32 addrspace(4)* %in.byref
   store i32 %in, i32 addrspace(1)* undef, align 4
   ret void
@@ -1812,24 +1920,26 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byre
 define amdgpu_kernel void @p3i8_arg(i8 addrspace(3)* %arg) nounwind {
   ; HSA-VI-LABEL: name: p3i8_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), align 16, addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
-  ; HSA-VI:   G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
+  ; HSA-VI-NEXT:   G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: p3i8_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
-  ; LEGACY-MESA-VI:   G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   store i8 9, i8 addrspace(3)* %arg, align 4
   ret void
 }
@@ -1837,20 +1947,22 @@ define amdgpu_kernel void @p3i8_arg(i8 addrspace(3)* %arg) nounwind {
 define amdgpu_kernel void @p1i8_arg(i8 addrspace(1)* %arg) nounwind {
   ; HSA-VI-LABEL: name: p1i8_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0
-  ; HSA-VI:   G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0
+  ; HSA-VI-NEXT:   G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: p1i8_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0
-  ; LEGACY-MESA-VI:   G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   store i8 9, i8 addrspace(3)* null
   ret void
 }
@@ -1858,24 +1970,26 @@ define amdgpu_kernel void @p1i8_arg(i8 addrspace(1)* %arg) nounwind {
 define amdgpu_kernel void @v2p1i8_arg(<2 x i8 addrspace(1)*> %arg) nounwind {
   ; HSA-VI-LABEL: name: v2p1i8_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), addrspace 4)
-  ; HSA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; HSA-VI:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), addrspace 4)
+  ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v2p1i8_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   store <2 x i8 addrspace(1)*> %arg, <2 x i8 addrspace(1)*> addrspace(1)* undef
   ret void
 }
@@ -1883,24 +1997,26 @@ define amdgpu_kernel void @v2p1i8_arg(<2 x i8 addrspace(1)*> %arg) nounwind {
 define amdgpu_kernel void @v2p3i8_arg(<2 x i8 addrspace(3)*> %arg) nounwind {
   ; HSA-VI-LABEL: name: v2p3i8_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 16, addrspace 4)
-  ; HSA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; HSA-VI:   G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v2p3i8_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   store <2 x i8 addrspace(3)*> %arg, <2 x i8 addrspace(3)*> addrspace(1)* undef
   ret void
 }
@@ -1908,36 +2024,38 @@ define amdgpu_kernel void @v2p3i8_arg(<2 x i8 addrspace(3)*> %arg) nounwind {
 define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg) nounwind {
   ; HSA-VI-LABEL: name: v2p1i8_in_struct_arg
   ; HSA-VI: bb.1 (%ir-block.0):
-  ; HSA-VI:   liveins: $sgpr4_sgpr5
-  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), addrspace 4)
-  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 16, addrspace 4)
-  ; HSA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; HSA-VI:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1)
-  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; HSA-VI:   G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1)
-  ; HSA-VI:   S_ENDPGM 0
+  ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
+  ; HSA-VI-NEXT: {{  $}}
+  ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), addrspace 4)
+  ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 16, addrspace 4)
+  ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1)
+  ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1)
+  ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v2p1i8_in_struct_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
-  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
-  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
-  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
-  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
-  ; LEGACY-MESA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1)
-  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1)
-  ; LEGACY-MESA-VI:   S_ENDPGM 0
+  ; LEGACY-MESA-VI-NEXT:   liveins: $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT: {{  $}}
+  ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+  ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
+  ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   store { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg, { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
index e7ef3bd455e4..2da014d79a0f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
@@ -5,12 +5,13 @@
 define amdgpu_ps void @disabled_input(float inreg %arg0, float %psinput0, float %psinput1) #1 {
   ; CHECK-LABEL: name: disabled_input
   ; CHECK: bb.1.main_body:
-  ; CHECK:   liveins: $sgpr2, $vgpr0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr2, $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0
+  ; CHECK-NEXT:   S_ENDPGM 0
 main_body:
   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0
   ret void
@@ -19,13 +20,14 @@ main_body:
 define amdgpu_ps void @disabled_input_struct(float inreg %arg0, { float, float } %psinput0, float %psinput1) #1 {
   ; CHECK-LABEL: name: disabled_input_struct
   ; CHECK: bb.1.main_body:
-  ; CHECK:   liveins: $sgpr2, $vgpr0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
-  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr2, $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+  ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0
+  ; CHECK-NEXT:   S_ENDPGM 0
 main_body:
   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0
   ret void
@@ -34,10 +36,11 @@ main_body:
 define amdgpu_ps float @vgpr_return(i32 %vgpr) {
   ; CHECK-LABEL: name: vgpr_return
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   $vgpr0 = COPY [[COPY]](s32)
-  ; CHECK:   SI_RETURN_TO_EPILOG implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   $vgpr0 = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %cast = bitcast i32 %vgpr to float
   ret float %cast
 }
@@ -45,58 +48,62 @@ define amdgpu_ps float @vgpr_return(i32 %vgpr) {
 define amdgpu_ps i32 @sgpr_return_i32(i32 %vgpr) {
   ; CHECK-LABEL: name: sgpr_return_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
-  ; CHECK:   $sgpr0 = COPY [[INT]](s32)
-  ; CHECK:   SI_RETURN_TO_EPILOG implicit $sgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
+  ; CHECK-NEXT:   $sgpr0 = COPY [[INT]](s32)
+  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
   ret i32 %vgpr
 }
 
 define amdgpu_ps i64 @sgpr_return_i64(i64 %vgpr) {
   ; CHECK-LABEL: name: sgpr_return_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
-  ; CHECK:   [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
-  ; CHECK:   $sgpr0 = COPY [[INT]](s32)
-  ; CHECK:   [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
-  ; CHECK:   $sgpr1 = COPY [[INT1]](s32)
-  ; CHECK:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+  ; CHECK-NEXT:   $sgpr0 = COPY [[INT]](s32)
+  ; CHECK-NEXT:   [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+  ; CHECK-NEXT:   $sgpr1 = COPY [[INT1]](s32)
+  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
   ret i64 %vgpr
 }
 
 define amdgpu_ps <2 x i32> @sgpr_return_v2i32(<2 x i32> %vgpr) {
   ; CHECK-LABEL: name: sgpr_return_v2i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
-  ; CHECK:   [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
-  ; CHECK:   $sgpr0 = COPY [[INT]](s32)
-  ; CHECK:   [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
-  ; CHECK:   $sgpr1 = COPY [[INT1]](s32)
-  ; CHECK:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+  ; CHECK-NEXT:   $sgpr0 = COPY [[INT]](s32)
+  ; CHECK-NEXT:   [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+  ; CHECK-NEXT:   $sgpr1 = COPY [[INT1]](s32)
+  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
   ret <2 x i32> %vgpr
 }
 
 define amdgpu_ps { i32, i32 } @sgpr_struct_return_i32_i32(i32 %vgpr0, i32 %vgpr1) {
   ; CHECK-LABEL: name: sgpr_struct_return_i32_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK:   [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
-  ; CHECK:   $sgpr0 = COPY [[INT]](s32)
-  ; CHECK:   [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
-  ; CHECK:   $sgpr1 = COPY [[INT1]](s32)
-  ; CHECK:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
+  ; CHECK-NEXT:   $sgpr0 = COPY [[INT]](s32)
+  ; CHECK-NEXT:   [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
+  ; CHECK-NEXT:   $sgpr1 = COPY [[INT1]](s32)
+  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
   %insertvalue0 = insertvalue { i32, i32 } undef, i32 %vgpr0, 0
   %value = insertvalue { i32, i32 } %insertvalue0, i32 %vgpr1, 1
   ret { i32, i32 } %value

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
index f5be7ef76f4a..6c867f8ddd22 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
@@ -4,11 +4,12 @@
 define amdgpu_vs void @test_f32_inreg(float inreg %arg0) {
   ; CHECK-LABEL: name: test_f32_inreg
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr2
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; CHECK:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
   ret void
 }
@@ -16,11 +17,12 @@ define amdgpu_vs void @test_f32_inreg(float inreg %arg0) {
 define amdgpu_vs void @test_f32(float %arg0) {
   ; CHECK-LABEL: name: test_f32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
   ret void
 }
@@ -28,12 +30,13 @@ define amdgpu_vs void @test_f32(float %arg0) {
 define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) {
   ; CHECK-LABEL: name: test_ptr2_inreg
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr2, $sgpr3
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; CHECK:   [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile load (s32) from %ir.arg0, addrspace 4)
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr2, $sgpr3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile load (s32) from %ir.arg0, addrspace 4)
+  ; CHECK-NEXT:   S_ENDPGM 0
   %tmp0 = load volatile i32, i32 addrspace(4)* %arg0
   ret void
 }
@@ -41,15 +44,16 @@ define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) {
 define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* inreg %arg1) {
   ; CHECK-LABEL: name: test_sgpr_alignment0
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr2, $sgpr3, $sgpr4
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; CHECK:   [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
-  ; CHECK:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile load (s32) from %ir.arg1, addrspace 4)
-  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile load (s32) from %ir.arg1, addrspace 4)
+  ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
+  ; CHECK-NEXT:   S_ENDPGM 0
   %tmp0 = load volatile i32, i32 addrspace(4)* %arg1
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
   ret void
@@ -58,13 +62,14 @@ define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)*
 define amdgpu_vs void @test_order(float inreg %arg0, float inreg %arg1, float %arg2, float %arg3) {
   ; CHECK-LABEL: name: test_order
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY2]](s32), [[COPY]](s32), [[COPY3]](s32), [[COPY1]](s32), 0, 0
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY2]](s32), [[COPY]](s32), [[COPY3]](s32), [[COPY1]](s32), 0, 0
+  ; CHECK-NEXT:   S_ENDPGM 0
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg2, float %arg0, float %arg3, float %arg1, i1 false, i1 false) #0
   ret void
 }
@@ -72,15 +77,16 @@ define amdgpu_vs void @test_order(float inreg %arg0, float inreg %arg1, float %a
 define amdgpu_vs <{ i32, i32 }> @ret_struct(i32 inreg %arg0, i32 inreg %arg1) {
   ; CHECK-LABEL: name: ret_struct
   ; CHECK: bb.1.main_body:
-  ; CHECK:   liveins: $sgpr2, $sgpr3
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; CHECK:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK:   [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
-  ; CHECK:   $sgpr0 = COPY [[INT]](s32)
-  ; CHECK:   [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
-  ; CHECK:   $sgpr1 = COPY [[INT1]](s32)
-  ; CHECK:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
+  ; CHECK-NEXT:   liveins: $sgpr2, $sgpr3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
+  ; CHECK-NEXT:   $sgpr0 = COPY [[INT]](s32)
+  ; CHECK-NEXT:   [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
+  ; CHECK-NEXT:   $sgpr1 = COPY [[INT1]](s32)
+  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
 main_body:
   %tmp0 = insertvalue <{ i32, i32 }> undef, i32 %arg0, 0
   %tmp1 = insertvalue <{ i32, i32 }> %tmp0, i32 %arg1, 1
@@ -90,10 +96,10 @@ main_body:
 define amdgpu_vs i32 @non_void_ret() {
   ; CHECK-LABEL: name: non_void_ret
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32)
-  ; CHECK:   $sgpr0 = COPY [[INT]](s32)
-  ; CHECK:   SI_RETURN_TO_EPILOG implicit $sgpr0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32)
+  ; CHECK-NEXT:   $sgpr0 = COPY [[INT]](s32)
+  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
   ret i32 0
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll
index 0c15a9ed2675..78ecae87d06f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll
@@ -6,20 +6,21 @@
 define i32 @test() {
   ; CHECK-LABEL: name: test
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
-  ; CHECK:   [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C]](s32)
-  ; CHECK:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var
-  ; CHECK:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[INTTOPTR]](p0), [[GV]]
-  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY [[ZEXT]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32)
-  ; CHECK:   $vgpr0 = COPY [[COPY4]](s32)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+  ; CHECK-NEXT:   [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C]](s32)
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var
+  ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[INTTOPTR]](p0), [[GV]]
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY [[ZEXT]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[COPY4]](s32)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0
   ret i32 bitcast (<1 x i32> <i32 extractelement (<1 x i32> bitcast (i32 zext (i1 icmp eq (i32* @var, i32* inttoptr (i32 -1 to i32*)) to i32) to <1 x i32>), i64 0)> to i32)
 }
 
@@ -30,15 +31,15 @@ define i32 @test() {
 define amdgpu_kernel void @constantexpr_select_0() {
   ; CHECK-LABEL: name: constantexpr_select_0
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @gint
-  ; CHECK:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; CHECK:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[GV]](p1), [[C]]
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]]
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @gint
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[GV]](p1), [[C]]
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   S_ENDPGM 0
   store i32 select (i1 icmp eq (i8 addrspace(1)* @gint, i8 addrspace(1)* null), i32 1, i32 0), i32 addrspace(1)* undef, align 4
   ret void
 }
@@ -46,16 +47,16 @@ define amdgpu_kernel void @constantexpr_select_0() {
 define amdgpu_kernel void @constantexpr_select_1() {
   ; CHECK-LABEL: name: constantexpr_select_1
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1024
-  ; CHECK:   [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[C]](s64)
-  ; CHECK:   [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @gint
-  ; CHECK:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[INTTOPTR]](p1), [[GV]]
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]]
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1024
+  ; CHECK-NEXT:   [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[C]](s64)
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @gint
+  ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[INTTOPTR]](p1), [[GV]]
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]]
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   S_ENDPGM 0
   store i32 select (i1 icmp eq (i8 addrspace(1)* @gint, i8 addrspace(1)* inttoptr (i64 1024 to i8 addrspace(1)*)), i32 1, i32 0), i32 addrspace(1)* undef, align 4
   ret void
 }
@@ -65,20 +66,21 @@ define amdgpu_kernel void @constantexpr_select_1() {
 define i32 @test_fcmp_constexpr() {
   ; CHECK-LABEL: name: test_fcmp_constexpr
   ; CHECK: bb.1.entry:
-  ; CHECK:   liveins: $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[C]](s64)
-  ; CHECK:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var
-  ; CHECK:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[PTR_ADD]](p0), [[GV1]]
-  ; CHECK:   [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[ICMP]](s1)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
-  ; CHECK:   [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UITOFP]](s32), [[C1]]
-  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FCMP]](s1)
-  ; CHECK:   $vgpr0 = COPY [[ZEXT]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
-  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[C]](s64)
+  ; CHECK-NEXT:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var
+  ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[PTR_ADD]](p0), [[GV1]]
+  ; CHECK-NEXT:   [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[ICMP]](s1)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+  ; CHECK-NEXT:   [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UITOFP]](s32), [[C1]]
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FCMP]](s1)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ZEXT]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0
 entry:
   ret i32 zext (i1 fcmp oeq (float uitofp (i1 icmp eq (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i64 0, i64 1), i32* @var) to float), float 0.000000e+00) to i32)
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fence.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fence.ll
index 684d566592aa..eebbe20abd04 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fence.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fence.ll
@@ -4,8 +4,8 @@
 define amdgpu_kernel void @system_one_as_acquire() {
   ; CHECK-LABEL: name: system_one_as_acquire
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 4, 2
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 4, 2
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("one-as") acquire
   ret void
 }
@@ -13,8 +13,8 @@ define amdgpu_kernel void @system_one_as_acquire() {
 define amdgpu_kernel void @system_one_as_release() {
   ; CHECK-LABEL: name: system_one_as_release
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 5, 2
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 5, 2
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("one-as") release
   ret void
 }
@@ -22,8 +22,8 @@ define amdgpu_kernel void @system_one_as_release() {
 define amdgpu_kernel void @system_one_as_acq_rel() {
   ; CHECK-LABEL: name: system_one_as_acq_rel
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 6, 2
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 6, 2
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("one-as") acq_rel
   ret void
 }
@@ -31,8 +31,8 @@ define amdgpu_kernel void @system_one_as_acq_rel() {
 define amdgpu_kernel void @system_one_as_seq_cst() {
   ; CHECK-LABEL: name: system_one_as_seq_cst
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 7, 2
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 7, 2
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("one-as") seq_cst
   ret void
 }
@@ -40,8 +40,8 @@ define amdgpu_kernel void @system_one_as_seq_cst() {
 define amdgpu_kernel void @singlethread_one_as_acquire() {
   ; CHECK-LABEL: name: singlethread_one_as_acquire
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 4, 3
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 4, 3
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("singlethread-one-as") acquire
   ret void
 }
@@ -49,8 +49,8 @@ define amdgpu_kernel void @singlethread_one_as_acquire() {
 define amdgpu_kernel void @singlethread_one_as_release() {
   ; CHECK-LABEL: name: singlethread_one_as_release
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 5, 3
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 5, 3
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("singlethread-one-as") release
   ret void
 }
@@ -58,8 +58,8 @@ define amdgpu_kernel void @singlethread_one_as_release() {
 define amdgpu_kernel void @singlethread_one_as_acq_rel() {
   ; CHECK-LABEL: name: singlethread_one_as_acq_rel
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 6, 3
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 6, 3
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("singlethread-one-as") acq_rel
   ret void
 }
@@ -67,8 +67,8 @@ define amdgpu_kernel void @singlethread_one_as_acq_rel() {
 define amdgpu_kernel void @singlethread_one_as_seq_cst() {
   ; CHECK-LABEL: name: singlethread_one_as_seq_cst
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 7, 3
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 7, 3
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("singlethread-one-as") seq_cst
   ret void
 }
@@ -76,8 +76,8 @@ define amdgpu_kernel void @singlethread_one_as_seq_cst() {
 define amdgpu_kernel void @agent_one_as_acquire() {
   ; CHECK-LABEL: name: agent_one_as_acquire
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 4, 4
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 4, 4
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("agent-one-as") acquire
   ret void
 }
@@ -85,8 +85,8 @@ define amdgpu_kernel void @agent_one_as_acquire() {
 define amdgpu_kernel void @agent_one_as_release() {
   ; CHECK-LABEL: name: agent_one_as_release
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 5, 4
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 5, 4
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("agent-one-as") release
   ret void
 }
@@ -94,8 +94,8 @@ define amdgpu_kernel void @agent_one_as_release() {
 define amdgpu_kernel void @agent_one_as_acq_rel() {
   ; CHECK-LABEL: name: agent_one_as_acq_rel
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 6, 4
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 6, 4
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("agent-one-as") acq_rel
   ret void
 }
@@ -103,8 +103,8 @@ define amdgpu_kernel void @agent_one_as_acq_rel() {
 define amdgpu_kernel void @agent_one_as_seq_cst() {
   ; CHECK-LABEL: name: agent_one_as_seq_cst
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 7, 4
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 7, 4
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("agent-one-as") seq_cst
   ret void
 }
@@ -112,8 +112,8 @@ define amdgpu_kernel void @agent_one_as_seq_cst() {
 define amdgpu_kernel void @workgroup_one_as_acquire() {
   ; CHECK-LABEL: name: workgroup_one_as_acquire
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 4, 5
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 4, 5
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("workgroup-one-as") acquire
   ret void
 }
@@ -121,8 +121,8 @@ define amdgpu_kernel void @workgroup_one_as_acquire() {
 define amdgpu_kernel void @workgroup_one_as_release() {
   ; CHECK-LABEL: name: workgroup_one_as_release
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 5, 5
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 5, 5
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("workgroup-one-as") release
   ret void
 }
@@ -130,8 +130,8 @@ define amdgpu_kernel void @workgroup_one_as_release() {
 define amdgpu_kernel void @workgroup_one_as_acq_rel() {
   ; CHECK-LABEL: name: workgroup_one_as_acq_rel
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 6, 5
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 6, 5
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("workgroup-one-as") acq_rel
   ret void
 }
@@ -139,8 +139,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel() {
 define amdgpu_kernel void @workgroup_one_as_seq_cst() {
   ; CHECK-LABEL: name: workgroup_one_as_seq_cst
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 7, 5
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 7, 5
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("workgroup-one-as") seq_cst
   ret void
 }
@@ -148,8 +148,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst() {
 define amdgpu_kernel void @wavefront_one_as_acquire() {
   ; CHECK-LABEL: name: wavefront_one_as_acquire
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 4, 6
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 4, 6
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("wavefront-one-as") acquire
   ret void
 }
@@ -157,8 +157,8 @@ define amdgpu_kernel void @wavefront_one_as_acquire() {
 define amdgpu_kernel void @wavefront_one_as_release() {
   ; CHECK-LABEL: name: wavefront_one_as_release
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 5, 6
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 5, 6
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("wavefront-one-as") release
   ret void
 }
@@ -166,8 +166,8 @@ define amdgpu_kernel void @wavefront_one_as_release() {
 define amdgpu_kernel void @wavefront_one_as_acq_rel() {
   ; CHECK-LABEL: name: wavefront_one_as_acq_rel
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 6, 6
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 6, 6
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("wavefront-one-as") acq_rel
   ret void
 }
@@ -175,8 +175,8 @@ define amdgpu_kernel void @wavefront_one_as_acq_rel() {
 define amdgpu_kernel void @wavefront_one_as_seq_cst() {
   ; CHECK-LABEL: name: wavefront_one_as_seq_cst
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 7, 6
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 7, 6
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("wavefront-one-as") seq_cst
   ret void
 }
@@ -184,7 +184,7 @@ define amdgpu_kernel void @wavefront_one_as_seq_cst() {
 define amdgpu_kernel void @system_acquire() {
   ; CHECK-LABEL: name: system_acquire
   ; CHECK: bb.1.entry:
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   S_ENDPGM 0
 entry:
   ret void
 }
@@ -192,8 +192,8 @@ entry:
 define amdgpu_kernel void @system_release() {
   ; CHECK-LABEL: name: system_release
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 5, 1
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 5, 1
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence release
   ret void
 }
@@ -201,8 +201,8 @@ define amdgpu_kernel void @system_release() {
 define amdgpu_kernel void @system_acq_rel() {
   ; CHECK-LABEL: name: system_acq_rel
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 6, 1
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 6, 1
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence acq_rel
   ret void
 }
@@ -210,8 +210,8 @@ define amdgpu_kernel void @system_acq_rel() {
 define amdgpu_kernel void @system_seq_cst() {
   ; CHECK-LABEL: name: system_seq_cst
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 7, 1
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 7, 1
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence seq_cst
   ret void
 }
@@ -219,8 +219,8 @@ define amdgpu_kernel void @system_seq_cst() {
 define amdgpu_kernel void @singlethread_acquire() {
   ; CHECK-LABEL: name: singlethread_acquire
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 4, 0
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 4, 0
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("singlethread") acquire
   ret void
 }
@@ -228,8 +228,8 @@ define amdgpu_kernel void @singlethread_acquire() {
 define amdgpu_kernel void @singlethread_release() {
   ; CHECK-LABEL: name: singlethread_release
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 5, 0
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 5, 0
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("singlethread") release
   ret void
 }
@@ -237,8 +237,8 @@ define amdgpu_kernel void @singlethread_release() {
 define amdgpu_kernel void @singlethread_acq_rel() {
   ; CHECK-LABEL: name: singlethread_acq_rel
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 6, 0
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 6, 0
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("singlethread") acq_rel
   ret void
 }
@@ -246,8 +246,8 @@ define amdgpu_kernel void @singlethread_acq_rel() {
 define amdgpu_kernel void @singlethread_seq_cst() {
   ; CHECK-LABEL: name: singlethread_seq_cst
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 7, 0
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 7, 0
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("singlethread") seq_cst
   ret void
 }
@@ -255,8 +255,8 @@ define amdgpu_kernel void @singlethread_seq_cst() {
 define amdgpu_kernel void @agent_acquire() {
   ; CHECK-LABEL: name: agent_acquire
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 4, 7
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 4, 7
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("agent") acquire
   ret void
 }
@@ -264,8 +264,8 @@ define amdgpu_kernel void @agent_acquire() {
 define amdgpu_kernel void @agent_release() {
   ; CHECK-LABEL: name: agent_release
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 5, 7
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 5, 7
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("agent") release
   ret void
 }
@@ -273,8 +273,8 @@ define amdgpu_kernel void @agent_release() {
 define amdgpu_kernel void @agent_acq_rel() {
   ; CHECK-LABEL: name: agent_acq_rel
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 6, 7
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 6, 7
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("agent") acq_rel
   ret void
 }
@@ -282,8 +282,8 @@ define amdgpu_kernel void @agent_acq_rel() {
 define amdgpu_kernel void @agent_seq_cst() {
   ; CHECK-LABEL: name: agent_seq_cst
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 7, 7
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 7, 7
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("agent") seq_cst
   ret void
 }
@@ -291,8 +291,8 @@ define amdgpu_kernel void @agent_seq_cst() {
 define amdgpu_kernel void @workgroup_acquire() {
   ; CHECK-LABEL: name: workgroup_acquire
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 4, 8
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 4, 8
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("workgroup") acquire
   ret void
 }
@@ -300,8 +300,8 @@ define amdgpu_kernel void @workgroup_acquire() {
 define amdgpu_kernel void @workgroup_release() {
   ; CHECK-LABEL: name: workgroup_release
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 5, 8
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 5, 8
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("workgroup") release
   ret void
 }
@@ -309,8 +309,8 @@ define amdgpu_kernel void @workgroup_release() {
 define amdgpu_kernel void @workgroup_acq_rel() {
   ; CHECK-LABEL: name: workgroup_acq_rel
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 6, 8
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 6, 8
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("workgroup") acq_rel
   ret void
 }
@@ -318,8 +318,8 @@ define amdgpu_kernel void @workgroup_acq_rel() {
 define amdgpu_kernel void @workgroup_seq_cst() {
   ; CHECK-LABEL: name: workgroup_seq_cst
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 7, 8
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 7, 8
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("workgroup") seq_cst
   ret void
 }
@@ -327,8 +327,8 @@ define amdgpu_kernel void @workgroup_seq_cst() {
 define amdgpu_kernel void @wavefront_acquire() {
   ; CHECK-LABEL: name: wavefront_acquire
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 4, 9
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 4, 9
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("wavefront") acquire
   ret void
 }
@@ -336,8 +336,8 @@ define amdgpu_kernel void @wavefront_acquire() {
 define amdgpu_kernel void @wavefront_release() {
   ; CHECK-LABEL: name: wavefront_release
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 5, 9
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 5, 9
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("wavefront") release
   ret void
 }
@@ -345,8 +345,8 @@ define amdgpu_kernel void @wavefront_release() {
 define amdgpu_kernel void @wavefront_acq_rel() {
   ; CHECK-LABEL: name: wavefront_acq_rel
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 6, 9
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 6, 9
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("wavefront") acq_rel
   ret void
 }
@@ -354,8 +354,8 @@ define amdgpu_kernel void @wavefront_acq_rel() {
 define amdgpu_kernel void @wavefront_seq_cst() {
   ; CHECK-LABEL: name: wavefront_seq_cst
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   G_FENCE 7, 9
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   G_FENCE 7, 9
+  ; CHECK-NEXT:   S_ENDPGM 0
   fence syncscope("wavefront") seq_cst
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll
index 3b07fa4828e4..ceba50dc1b7b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll
@@ -7,45 +7,46 @@
 define void @void_a31i32_i32([31 x i32] %arg0, i32 %arg1) {
   ; FIXED-LABEL: name: void_a31i32_i32
   ; FIXED: bb.1 (%ir-block.0):
-  ; FIXED:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; FIXED:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; FIXED:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; FIXED:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; FIXED:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; FIXED:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; FIXED:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; FIXED:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; FIXED:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; FIXED:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; FIXED:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; FIXED:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; FIXED:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; FIXED:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; FIXED:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; FIXED:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; FIXED:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; FIXED:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; FIXED:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; FIXED:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; FIXED:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; FIXED:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; FIXED:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; FIXED:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; FIXED:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; FIXED:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; FIXED:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; FIXED:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; FIXED:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; FIXED:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; FIXED:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; FIXED:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; FIXED:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; FIXED:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
-  ; FIXED:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; FIXED:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; FIXED:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; FIXED:   [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
-  ; FIXED:   S_SETPC_B64_return [[COPY32]]
+  ; FIXED-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; FIXED-NEXT: {{  $}}
+  ; FIXED-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; FIXED-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; FIXED-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; FIXED-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; FIXED-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; FIXED-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; FIXED-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; FIXED-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; FIXED-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; FIXED-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; FIXED-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; FIXED-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; FIXED-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; FIXED-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; FIXED-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; FIXED-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; FIXED-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; FIXED-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; FIXED-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; FIXED-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; FIXED-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; FIXED-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; FIXED-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; FIXED-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; FIXED-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; FIXED-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; FIXED-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; FIXED-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; FIXED-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; FIXED-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; FIXED-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; FIXED-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; FIXED-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
+  ; FIXED-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; FIXED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; FIXED-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; FIXED-NEXT:   [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
+  ; FIXED-NEXT:   S_SETPC_B64_return [[COPY32]]
   store i32 %arg1, i32 addrspace(1)* undef
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
index c9c5dd7c3e57..70a41f9dafaa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
@@ -8,13 +8,14 @@
 define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 {
   ; CHECK-LABEL: name: void_func_empty_arg
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   store i32 %arg1, i32 addrspace(1)* undef
   ret void
 }
@@ -22,13 +23,14 @@ define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 {
 define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 {
   ; CHECK-LABEL: name: void_func_empty_array
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   store i32 %arg1, i32 addrspace(1)* undef
   ret void
 }
@@ -36,14 +38,15 @@ define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 {
 define void @void_func_i1(i1 %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i1
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store (s1) into `i1 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store (s1) into `i1 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   store i1 %arg0, i1 addrspace(1)* undef
   ret void
 }
@@ -51,18 +54,19 @@ define void @void_func_i1(i1 %arg0) #0 {
 define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i1_zeroext
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 1
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)
-  ; CHECK:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]]
-  ; CHECK:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 1
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]]
+  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   %ext = zext i1 %arg0 to i32
   %add = add i32 %ext, 12
   store i32 %add, i32 addrspace(1)* undef
@@ -72,18 +76,19 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {
 define void @void_func_i1_signext(i1 signext %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i1_signext
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 1
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1)
-  ; CHECK:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]]
-  ; CHECK:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 1
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]]
+  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   %ext = sext i1 %arg0 to i32
   %add = add i32 %ext, 12
   store i32 %add, i32 addrspace(1)* undef
@@ -93,26 +98,30 @@ define void @void_func_i1_signext(i1 signext %arg0) #0 {
 define void @i1_arg_i1_use(i1 %arg) #0 {
   ; CHECK-LABEL: name: i1_arg_i1_use
   ; CHECK: bb.1.bb:
-  ; CHECK:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC]], [[C]]
-  ; CHECK:   [[INT:%[0-9]+]]:_(s1), [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), [[XOR]](s1)
-  ; CHECK:   G_BRCOND [[INT]](s1), %bb.2
-  ; CHECK:   G_BR %bb.3
-  ; CHECK: bb.2.bb1:
-  ; CHECK:   successors: %bb.3(0x80000000)
-  ; CHECK:   G_STORE [[C1]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_BR %bb.3
-  ; CHECK: bb.3.bb2:
-  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s64)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC]], [[C]]
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(s1), [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), [[XOR]](s1)
+  ; CHECK-NEXT:   G_BRCOND [[INT]](s1), %bb.2
+  ; CHECK-NEXT:   G_BR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.bb1:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   G_STORE [[C1]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_BR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.bb2:
+  ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s64)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
 bb:
   br i1 %arg, label %bb2, label %bb1
 
@@ -127,15 +136,16 @@ bb2:
 define void @void_func_i8(i8 %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-  ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   store i8 %arg0, i8 addrspace(1)* undef
   ret void
 }
@@ -143,18 +153,19 @@ define void @void_func_i8(i8 %arg0) #0 {
 define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i8_zeroext
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 8
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
-  ; CHECK:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]]
-  ; CHECK:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 8
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]]
+  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   %ext = zext i8 %arg0 to i32
   %add = add i32 %ext, 12
   store i32 %add, i32 addrspace(1)* undef
@@ -164,18 +175,19 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
 define void @void_func_i8_signext(i8 signext %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i8_signext
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 8
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)
-  ; CHECK:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]]
-  ; CHECK:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 8
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]]
+  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   %ext = sext i8 %arg0 to i32
   %add = add i32 %ext, 12
   store i32 %add, i32 addrspace(1)* undef
@@ -185,14 +197,15 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 {
 define void @void_func_i16(i16 %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `i16 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `i16 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   store i16 %arg0, i16 addrspace(1)* undef
   ret void
 }
@@ -200,18 +213,19 @@ define void @void_func_i16(i16 %arg0) #0 {
 define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i16_zeroext
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
-  ; CHECK:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]]
-  ; CHECK:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]]
+  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   %ext = zext i16 %arg0 to i32
   %add = add i32 %ext, 12
   store i32 %add, i32 addrspace(1)* undef
@@ -221,18 +235,19 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
 define void @void_func_i16_signext(i16 signext %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i16_signext
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
-  ; CHECK:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]]
-  ; CHECK:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]]
+  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   %ext = sext i16 %arg0 to i32
   %add = add i32 %ext, 12
   store i32 %add, i32 addrspace(1)* undef
@@ -242,14 +257,15 @@ define void @void_func_i16_signext(i16 signext %arg0) #0 {
 define void @void_func_i24(i24 %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i24
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   store i24 %arg0, i24 addrspace(1)* undef
   ret void
 }
@@ -257,15 +273,16 @@ define void @void_func_i24(i24 %arg0) #0 {
 define void @void_func_i24_zeroext(i24 zeroext %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i24_zeroext
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 24
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_ZEXT]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 24
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_ZEXT]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   store i24 %arg0, i24 addrspace(1)* undef
   ret void
 }
@@ -273,15 +290,16 @@ define void @void_func_i24_zeroext(i24 zeroext %arg0) #0 {
 define void @void_func_i24_signext(i24 signext %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i24_signext
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 24
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_SEXT]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 24
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_SEXT]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   store i24 %arg0, i24 addrspace(1)* undef
   ret void
 }
@@ -289,13 +307,14 @@ define void @void_func_i24_signext(i24 signext %arg0) #0 {
 define void @void_func_i32(i32 %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   store i32 %arg0, i32 addrspace(1)* undef
   ret void
 }
@@ -304,13 +323,14 @@ define void @void_func_i32(i32 %arg0) #0 {
 define void @void_func_i32_signext(i32 signext %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i32_signext
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   store i32 %arg0, i32 addrspace(1)* undef
   ret void
 }
@@ -319,13 +339,14 @@ define void @void_func_i32_signext(i32 signext %arg0) #0 {
 define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i32_zeroext
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   store i32 %arg0, i32 addrspace(1)* undef
   ret void
 }
@@ -333,13 +354,14 @@ define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 {
 define void @void_func_p3i8(i8 addrspace(3)* %arg0) #0 {
   ; CHECK-LABEL: name: void_func_p3i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[COPY]](p3), [[DEF]](p1) :: (store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[COPY]](p3), [[DEF]](p1) :: (store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   store i8 addrspace(3)* %arg0, i8 addrspace(3)* addrspace(1)* undef
   ret void
 }
@@ -347,16 +369,17 @@ define void @void_func_p3i8(i8 addrspace(3)* %arg0) #0 {
 define void @void_func_i48(i48 %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i48
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   store i48 %arg0, i48 addrspace(1)* undef
   ret void
 }
@@ -364,19 +387,20 @@ define void @void_func_i48(i48 %arg0) #0 {
 define void @void_func_i48_zeroext(i48 zeroext %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i48_zeroext
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-  ; CHECK:   [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ZEXT]], [[C]]
-  ; CHECK:   G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ZEXT]], [[C]]
+  ; CHECK-NEXT:   G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   %ext = zext i48 %arg0 to i64
   %add = add i64 %ext, 12
   store i64 %add, i64 addrspace(1)* undef
@@ -386,19 +410,20 @@ define void @void_func_i48_zeroext(i48 zeroext %arg0) #0 {
 define void @void_func_i48_signext(i48 signext %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i48_signext
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48)
-  ; CHECK:   [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SEXT]], [[C]]
-  ; CHECK:   G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SEXT]], [[C]]
+  ; CHECK-NEXT:   G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   %ext = sext i48 %arg0 to i64
   %add = add i64 %ext, 12
   store i64 %add, i64 addrspace(1)* undef
@@ -408,15 +433,16 @@ define void @void_func_i48_signext(i48 signext %arg0) #0 {
 define void @void_func_i64(i64 %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   store i64 %arg0, i64 addrspace(1)* undef
   ret void
 }
@@ -424,17 +450,18 @@ define void @void_func_i64(i64 %arg0) #0 {
 define void @void_func_i95(i95 %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i95
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store (s95) into `i95 addrspace(1)* undef`, align 8, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store (s95) into `i95 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   store i95 %arg0, i95 addrspace(1)* undef
   ret void
 }
@@ -442,20 +469,21 @@ define void @void_func_i95(i95 %arg0) #0 {
 define void @void_func_i95_zeroext(i95 zeroext %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i95_zeroext
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[TRUNC]](s95)
-  ; CHECK:   [[ADD:%[0-9]+]]:_(s96) = G_ADD [[ZEXT]], [[C]]
-  ; CHECK:   G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[TRUNC]](s95)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s96) = G_ADD [[ZEXT]], [[C]]
+  ; CHECK-NEXT:   G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   %ext = zext i95 %arg0 to i96
   %add = add i96 %ext, 12
   store i96 %add, i96 addrspace(1)* undef
@@ -465,20 +493,21 @@ define void @void_func_i95_zeroext(i95 zeroext %arg0) #0 {
 define void @void_func_i95_signext(i95 signext %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i95_signext
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[TRUNC]](s95)
-  ; CHECK:   [[ADD:%[0-9]+]]:_(s96) = G_ADD [[SEXT]], [[C]]
-  ; CHECK:   G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[TRUNC]](s95)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s96) = G_ADD [[SEXT]], [[C]]
+  ; CHECK-NEXT:   G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   %ext = sext i95 %arg0 to i96
   %add = add i96 %ext, 12
   store i96 %add, i96 addrspace(1)* undef
@@ -488,16 +517,17 @@ define void @void_func_i95_signext(i95 signext %arg0) #0 {
 define void @void_func_i96(i96 %arg0) #0 {
   ; CHECK-LABEL: name: void_func_i96
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[MV]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[MV]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   store i96 %arg0, i96 addrspace(1)* undef
   ret void
 }
@@ -505,15 +535,16 @@ define void @void_func_i96(i96 %arg0) #0 {
 define void @void_func_p0i8(i8* %arg0) #0 {
   ; CHECK-LABEL: name: void_func_p0i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[MV]](p0), [[DEF]](p1) :: (store (p0) into `i8* addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[MV]](p0), [[DEF]](p1) :: (store (p0) into `i8* addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   store i8* %arg0, i8* addrspace(1)* undef
   ret void
 }
@@ -521,15 +552,16 @@ define void @void_func_p0i8(i8* %arg0) #0 {
 define void @void_func_p1i8(i8 addrspace(1)* %arg0) #0 {
   ; CHECK-LABEL: name: void_func_p1i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[MV]](p1), [[DEF]](p1) :: (store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[MV]](p1), [[DEF]](p1) :: (store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   store i8 addrspace(1)* %arg0, i8 addrspace(1)* addrspace(1)* undef
   ret void
 }
@@ -537,14 +569,15 @@ define void @void_func_p1i8(i8 addrspace(1)* %arg0) #0 {
 define void @void_func_f16(half %arg0) #0 {
   ; CHECK-LABEL: name: void_func_f16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `half addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `half addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   store half %arg0, half addrspace(1)* undef
   ret void
 }
@@ -552,13 +585,14 @@ define void @void_func_f16(half %arg0) #0 {
 define void @void_func_f32(float %arg0) #0 {
   ; CHECK-LABEL: name: void_func_f32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   store float %arg0, float addrspace(1)* undef
   ret void
 }
@@ -566,15 +600,16 @@ define void @void_func_f32(float %arg0) #0 {
 define void @void_func_f64(double %arg0) #0 {
   ; CHECK-LABEL: name: void_func_f64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `double addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `double addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   store double %arg0, double addrspace(1)* undef
   ret void
 }
@@ -582,15 +617,16 @@ define void @void_func_f64(double %arg0) #0 {
 define void @void_func_v2i32(<2 x i32> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v2i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   store <2 x i32> %arg0, <2 x i32> addrspace(1)* undef
   ret void
 }
@@ -598,16 +634,17 @@ define void @void_func_v2i32(<2 x i32> %arg0) #0 {
 define void @void_func_v2i24(<2 x i24> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v2i24
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(<2 x s24>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store (<2 x s24>) into `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(<2 x s24>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store (<2 x s24>) into `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   store <2 x i24> %arg0, <2 x i24> addrspace(1)* undef
   ret void
 }
@@ -615,17 +652,18 @@ define void @void_func_v2i24(<2 x i24> %arg0) #0 {
 define void @void_func_v3i24(<3 x i24> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v3i24
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(<3 x s24>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store (<3 x s24>) into `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(<3 x s24>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store (<3 x s24>) into `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   store <3 x i24> %arg0, <3 x i24> addrspace(1)* undef
   ret void
 }
@@ -633,18 +671,19 @@ define void @void_func_v3i24(<3 x i24> %arg0) #0 {
 define void @void_func_v2i8(<2 x i8> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v2i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
-  ; CHECK:   [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store (<2 x s8>) into `<2 x i8> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+  ; CHECK-NEXT:   [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store (<2 x s8>) into `<2 x i8> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   store <2 x i8> %arg0, <2 x i8> addrspace(1)* undef
   ret void
 }
@@ -652,20 +691,21 @@ define void @void_func_v2i8(<2 x i8> %arg0) #0 {
 define void @void_func_v3i8(<3 x i8> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v3i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16)
-  ; CHECK:   [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s16>)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store (<3 x s8>) into `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16)
+  ; CHECK-NEXT:   [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s16>)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store (<3 x s8>) into `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   store <3 x i8> %arg0, <3 x i8> addrspace(1)* undef
   ret void
 }
@@ -673,22 +713,23 @@ define void @void_func_v3i8(<3 x i8> %arg0) #0 {
 define void @void_func_v4i8(<4 x i8> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v4i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
-  ; CHECK:   [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s16>)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store (<4 x s8>) into `<4 x i8> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
+  ; CHECK-NEXT:   [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s16>)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store (<4 x s8>) into `<4 x i8> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]]
   store <4 x i8> %arg0, <4 x i8> addrspace(1)* undef
   ret void
 }
@@ -696,15 +737,16 @@ define void @void_func_v4i8(<4 x i8> %arg0) #0 {
 define void @void_func_v2p3i8(<2 x i8 addrspace(3)*> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v2p3i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   store <2 x i8 addrspace(3)*> %arg0, <2 x i8 addrspace(3)*> addrspace(1)* undef
   ret void
 }
@@ -712,16 +754,17 @@ define void @void_func_v2p3i8(<2 x i8 addrspace(3)*> %arg0) #0 {
 define void @void_func_v3i32(<3 x i32> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v3i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef
   ret void
 }
@@ -729,17 +772,18 @@ define void @void_func_v3i32(<3 x i32> %arg0) #0 {
 define void @void_func_v4i32(<4 x i32> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v4i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]]
   store <4 x i32> %arg0, <4 x i32> addrspace(1)* undef
   ret void
 }
@@ -747,18 +791,19 @@ define void @void_func_v4i32(<4 x i32> %arg0) #0 {
 define void @void_func_v5i32(<5 x i32> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v5i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1)
-  ; CHECK:   [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]]
-  ; CHECK:   S_SETPC_B64_return [[COPY6]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1)
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY6]]
   store <5 x i32> %arg0, <5 x i32> addrspace(1)* undef
   ret void
 }
@@ -766,21 +811,22 @@ define void @void_func_v5i32(<5 x i32> %arg0) #0 {
 define void @void_func_v8i32(<8 x i32> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v8i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
-  ; CHECK:   S_SETPC_B64_return [[COPY9]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY9]]
   store <8 x i32> %arg0, <8 x i32> addrspace(1)* undef
   ret void
 }
@@ -788,29 +834,30 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 {
 define void @void_func_v16i32(<16 x i32> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v16i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)
-  ; CHECK:   [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]]
-  ; CHECK:   S_SETPC_B64_return [[COPY17]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY17]]
   store <16 x i32> %arg0, <16 x i32> addrspace(1)* undef
   ret void
 }
@@ -818,46 +865,47 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 {
 define void @void_func_v32i32(<32 x i32> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v32i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; CHECK:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; CHECK:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; CHECK:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; CHECK:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; CHECK:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
-  ; CHECK:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
-  ; CHECK:   S_SETPC_B64_return [[COPY32]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY32]]
   store <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
   ret void
 }
@@ -866,48 +914,49 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 {
 define void @void_func_v33i32(<33 x i32> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v33i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; CHECK:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; CHECK:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; CHECK:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; CHECK:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; CHECK:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.1, align 16, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32), [[LOAD1]](s32)
-  ; CHECK:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1)
-  ; CHECK:   [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
-  ; CHECK:   S_SETPC_B64_return [[COPY32]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.1, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32), [[LOAD1]](s32)
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1)
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY32]]
   store <33 x i32> %arg0, <33 x i32> addrspace(1)* undef
   ret void
 }
@@ -915,19 +964,20 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 {
 define void @void_func_v2i64(<2 x i64> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v2i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]]
   store <2 x i64> %arg0, <2 x i64> addrspace(1)* undef
   ret void
 }
@@ -935,19 +985,20 @@ define void @void_func_v2i64(<2 x i64> %arg0) #0 {
 define void @void_func_v2p0i8(<2 x i8*> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v2p0i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[MV]](p0), [[MV1]](p0)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store (<2 x p0>) into `<2 x i8*> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[MV]](p0), [[MV1]](p0)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store (<2 x p0>) into `<2 x i8*> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]]
   store <2 x i8*> %arg0, <2 x i8*> addrspace(1)* undef
   ret void
 }
@@ -955,19 +1006,20 @@ define void @void_func_v2p0i8(<2 x i8*> %arg0) #0 {
 define void @void_func_v2p1i8(<2 x i8 addrspace(1)*> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v2p1i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]]
   store <2 x i8 addrspace(1)*> %arg0, <2 x i8 addrspace(1)*> addrspace(1)* undef
   ret void
 }
@@ -975,22 +1027,23 @@ define void @void_func_v2p1i8(<2 x i8 addrspace(1)*> %arg0) #0 {
 define void @void_func_v3i64(<3 x i64> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v3i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1)
-  ; CHECK:   [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]]
-  ; CHECK:   S_SETPC_B64_return [[COPY7]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1)
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY7]]
   store <3 x i64> %arg0, <3 x i64> addrspace(1)* undef
   ret void
 }
@@ -998,25 +1051,26 @@ define void @void_func_v3i64(<3 x i64> %arg0) #0 {
 define void @void_func_v4i64(<4 x i64> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v4i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
-  ; CHECK:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
-  ; CHECK:   S_SETPC_B64_return [[COPY9]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+  ; CHECK-NEXT:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY9]]
   store <4 x i64> %arg0, <4 x i64> addrspace(1)* undef
   ret void
 }
@@ -1024,28 +1078,29 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 {
 define void @void_func_v5i64(<5 x i64> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v5i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
-  ; CHECK:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
-  ; CHECK:   [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64)
-  ; CHECK:   [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store (<5 x s64>) into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1)
-  ; CHECK:   [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]]
-  ; CHECK:   S_SETPC_B64_return [[COPY11]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+  ; CHECK-NEXT:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+  ; CHECK-NEXT:   [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store (<5 x s64>) into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1)
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY11]]
   store <5 x i64> %arg0, <5 x i64> addrspace(1)* undef
   ret void
 }
@@ -1053,37 +1108,38 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 {
 define void @void_func_v8i64(<8 x i64> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v8i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
-  ; CHECK:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
-  ; CHECK:   [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
-  ; CHECK:   [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
-  ; CHECK:   [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
-  ; CHECK:   [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
-  ; CHECK:   [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x i64> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]]
-  ; CHECK:   S_SETPC_B64_return [[COPY17]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+  ; CHECK-NEXT:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+  ; CHECK-NEXT:   [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+  ; CHECK-NEXT:   [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+  ; CHECK-NEXT:   [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+  ; CHECK-NEXT:   [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x i64> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY17]]
   store <8 x i64> %arg0, <8 x i64> addrspace(1)* undef
   ret void
 }
@@ -1091,62 +1147,63 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 {
 define void @void_func_v16i64(<16 x i64> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v16i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; CHECK:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; CHECK:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; CHECK:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; CHECK:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; CHECK:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
-  ; CHECK:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
-  ; CHECK:   [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
-  ; CHECK:   [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
-  ; CHECK:   [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
-  ; CHECK:   [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
-  ; CHECK:   [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
-  ; CHECK:   [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
-  ; CHECK:   [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
-  ; CHECK:   [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
-  ; CHECK:   [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32)
-  ; CHECK:   [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32)
-  ; CHECK:   [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32)
-  ; CHECK:   [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64)
-  ; CHECK:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x i64> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
-  ; CHECK:   S_SETPC_B64_return [[COPY32]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+  ; CHECK-NEXT:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+  ; CHECK-NEXT:   [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+  ; CHECK-NEXT:   [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+  ; CHECK-NEXT:   [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+  ; CHECK-NEXT:   [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+  ; CHECK-NEXT:   [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+  ; CHECK-NEXT:   [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+  ; CHECK-NEXT:   [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
+  ; CHECK-NEXT:   [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
+  ; CHECK-NEXT:   [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32)
+  ; CHECK-NEXT:   [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32)
+  ; CHECK-NEXT:   [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32)
+  ; CHECK-NEXT:   [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64)
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x i64> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY32]]
   store <16 x i64> %arg0, <16 x i64> addrspace(1)* undef
   ret void
 }
@@ -1154,13 +1211,14 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 {
 define void @void_func_v2i16(<2 x i16> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v2i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   store <2 x i16> %arg0, <2 x i16> addrspace(1)* undef
   ret void
 }
@@ -1168,17 +1226,18 @@ define void @void_func_v2i16(<2 x i16> %arg0) #0 {
 define void @void_func_v3i16(<3 x i16> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v3i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; CHECK:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
-  ; CHECK:   [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef
   ret void
 }
@@ -1186,15 +1245,16 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 {
 define void @void_func_v4i16(<4 x i16> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v4i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   store <4 x i16> %arg0, <4 x i16> addrspace(1)* undef
   ret void
 }
@@ -1202,18 +1262,19 @@ define void @void_func_v4i16(<4 x i16> %arg0) #0 {
 define void @void_func_v5i16(<5 x i16> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v5i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-  ; CHECK:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
-  ; CHECK:   [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef
   ret void
 }
@@ -1221,17 +1282,18 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 {
 define void @void_func_v8i16(<8 x i16> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v8i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x i16> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x i16> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]]
   store <8 x i16> %arg0, <8 x i16> addrspace(1)* undef
   ret void
 }
@@ -1239,21 +1301,22 @@ define void @void_func_v8i16(<8 x i16> %arg0) #0 {
 define void @void_func_v16i16(<16 x i16> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v16i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>)
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x i16> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
-  ; CHECK:   S_SETPC_B64_return [[COPY9]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x i16> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY9]]
   store <16 x i16> %arg0, <16 x i16> addrspace(1)* undef
   ret void
 }
@@ -1263,50 +1326,51 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 {
 define void @void_func_v65i16(<65 x i16> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v65i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr15
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr16
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr17
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr18
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr19
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr20
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr21
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr22
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr23
-  ; CHECK:   [[COPY24:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr24
-  ; CHECK:   [[COPY25:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr25
-  ; CHECK:   [[COPY26:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr26
-  ; CHECK:   [[COPY27:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr27
-  ; CHECK:   [[COPY28:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr28
-  ; CHECK:   [[COPY29:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr29
-  ; CHECK:   [[COPY30:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr30
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, align 16, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, addrspace 5)
-  ; CHECK:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
-  ; CHECK:   [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>)
-  ; CHECK:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1)
-  ; CHECK:   [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
-  ; CHECK:   S_SETPC_B64_return [[COPY32]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr16
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr17
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr18
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr19
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr20
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr21
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr22
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr23
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr24
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr25
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr26
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr27
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr28
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr29
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr30
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, addrspace 5)
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>)
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1)
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY32]]
   store <65 x i16> %arg0, <65 x i16> addrspace(1)* undef
   ret void
 }
@@ -1314,15 +1378,16 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 {
 define void @void_func_v2f32(<2 x float> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v2f32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   store <2 x float> %arg0, <2 x float> addrspace(1)* undef
   ret void
 }
@@ -1330,16 +1395,17 @@ define void @void_func_v2f32(<2 x float> %arg0) #0 {
 define void @void_func_v3f32(<3 x float> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v3f32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   store <3 x float> %arg0, <3 x float> addrspace(1)* undef
   ret void
 }
@@ -1347,17 +1413,18 @@ define void @void_func_v3f32(<3 x float> %arg0) #0 {
 define void @void_func_v4f32(<4 x float> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v4f32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]]
   store <4 x float> %arg0, <4 x float> addrspace(1)* undef
   ret void
 }
@@ -1365,21 +1432,22 @@ define void @void_func_v4f32(<4 x float> %arg0) #0 {
 define void @void_func_v8f32(<8 x float> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v8f32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
-  ; CHECK:   S_SETPC_B64_return [[COPY9]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY9]]
   store <8 x float> %arg0, <8 x float> addrspace(1)* undef
   ret void
 }
@@ -1387,29 +1455,30 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 {
 define void @void_func_v16f32(<16 x float> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v16f32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)
-  ; CHECK:   [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]]
-  ; CHECK:   S_SETPC_B64_return [[COPY17]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY17]]
   store <16 x float> %arg0, <16 x float> addrspace(1)* undef
   ret void
 }
@@ -1417,19 +1486,20 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 {
 define void @void_func_v2f64(<2 x double> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v2f64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]]
   store <2 x double> %arg0, <2 x double> addrspace(1)* undef
   ret void
 }
@@ -1437,22 +1507,23 @@ define void @void_func_v2f64(<2 x double> %arg0) #0 {
 define void @void_func_v3f64(<3 x double> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v3f64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1)
-  ; CHECK:   [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]]
-  ; CHECK:   S_SETPC_B64_return [[COPY7]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1)
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY7]]
   store <3 x double> %arg0, <3 x double> addrspace(1)* undef
   ret void
 }
@@ -1460,25 +1531,26 @@ define void @void_func_v3f64(<3 x double> %arg0) #0 {
 define void @void_func_v4f64(<4 x double> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v4f64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
-  ; CHECK:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x double> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
-  ; CHECK:   S_SETPC_B64_return [[COPY9]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+  ; CHECK-NEXT:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x double> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY9]]
   store <4 x double> %arg0, <4 x double> addrspace(1)* undef
   ret void
 }
@@ -1486,37 +1558,38 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 {
 define void @void_func_v8f64(<8 x double> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v8f64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
-  ; CHECK:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
-  ; CHECK:   [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
-  ; CHECK:   [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
-  ; CHECK:   [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
-  ; CHECK:   [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
-  ; CHECK:   [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x double> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]]
-  ; CHECK:   S_SETPC_B64_return [[COPY17]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+  ; CHECK-NEXT:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+  ; CHECK-NEXT:   [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+  ; CHECK-NEXT:   [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+  ; CHECK-NEXT:   [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+  ; CHECK-NEXT:   [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x double> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY17]]
   store <8 x double> %arg0, <8 x double> addrspace(1)* undef
   ret void
 }
@@ -1524,62 +1597,63 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 {
 define void @void_func_v16f64(<16 x double> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v16f64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; CHECK:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; CHECK:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; CHECK:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; CHECK:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; CHECK:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
-  ; CHECK:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
-  ; CHECK:   [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
-  ; CHECK:   [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
-  ; CHECK:   [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
-  ; CHECK:   [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
-  ; CHECK:   [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
-  ; CHECK:   [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
-  ; CHECK:   [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
-  ; CHECK:   [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
-  ; CHECK:   [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32)
-  ; CHECK:   [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32)
-  ; CHECK:   [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32)
-  ; CHECK:   [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64)
-  ; CHECK:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x double> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
-  ; CHECK:   S_SETPC_B64_return [[COPY32]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+  ; CHECK-NEXT:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+  ; CHECK-NEXT:   [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+  ; CHECK-NEXT:   [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+  ; CHECK-NEXT:   [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+  ; CHECK-NEXT:   [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+  ; CHECK-NEXT:   [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+  ; CHECK-NEXT:   [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+  ; CHECK-NEXT:   [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
+  ; CHECK-NEXT:   [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
+  ; CHECK-NEXT:   [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32)
+  ; CHECK-NEXT:   [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32)
+  ; CHECK-NEXT:   [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32)
+  ; CHECK-NEXT:   [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64)
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x double> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY32]]
   store <16 x double> %arg0, <16 x double> addrspace(1)* undef
   ret void
 }
@@ -1587,13 +1661,14 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 {
 define void @void_func_v2f16(<2 x half> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v2f16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   store <2 x half> %arg0, <2 x half> addrspace(1)* undef
   ret void
 }
@@ -1601,17 +1676,18 @@ define void @void_func_v2f16(<2 x half> %arg0) #0 {
 define void @void_func_v3f16(<3 x half> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v3f16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; CHECK:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
-  ; CHECK:   [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   store <3 x half> %arg0, <3 x half> addrspace(1)* undef
   ret void
 }
@@ -1619,15 +1695,16 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 {
 define void @void_func_v4f16(<4 x half> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v4f16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   store <4 x half> %arg0, <4 x half> addrspace(1)* undef
   ret void
 }
@@ -1635,17 +1712,18 @@ define void @void_func_v4f16(<4 x half> %arg0) #0 {
 define void @void_func_v8f16(<8 x half> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v8f16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x half> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x half> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]]
   store <8 x half> %arg0, <8 x half> addrspace(1)* undef
   ret void
 }
@@ -1653,21 +1731,22 @@ define void @void_func_v8f16(<8 x half> %arg0) #0 {
 define void @void_func_v16f16(<16 x half> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v16f16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7
-  ; CHECK:   [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>)
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x half> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
-  ; CHECK:   S_SETPC_B64_return [[COPY9]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>)
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x half> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY9]]
   store <16 x half> %arg0, <16 x half> addrspace(1)* undef
   ret void
 }
@@ -1676,20 +1755,21 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 {
 define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 {
   ; CHECK-LABEL: name: void_func_i32_i64_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[MV]](s64), [[COPY5]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY6]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[MV]](s64), [[COPY5]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY6]]
   store volatile i32 %arg0, i32 addrspace(1)* undef
   store volatile i64 %arg1, i64 addrspace(1)* undef
   store volatile i32 %arg2, i32 addrspace(1)* undef
@@ -1699,13 +1779,14 @@ define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 {
 define void @void_func_struct_i32({ i32 } %arg0) #0 {
   ; CHECK-LABEL: name: void_func_struct_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `{ i32 } addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `{ i32 } addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   store { i32 } %arg0, { i32 } addrspace(1)* undef
   ret void
 }
@@ -1713,19 +1794,20 @@ define void @void_func_struct_i32({ i32 } %arg0) #0 {
 define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {
   ; CHECK-LABEL: name: void_func_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-  ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64)
-  ; CHECK:   G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64)
+  ; CHECK-NEXT:   G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef
   ret void
 }
@@ -1733,21 +1815,22 @@ define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {
 define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0) #0 {
   ; CHECK-LABEL: name: void_func_byval_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr30_sgpr31
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5)
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5)
-  ; CHECK:   G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64)
-  ; CHECK:   G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
-  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
-  ; CHECK:   S_SETPC_B64_return [[COPY2]]
+  ; CHECK-NEXT:   liveins: $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5)
+  ; CHECK-NEXT:   G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
   %arg0.load = load { i8, i32 }, { i8, i32 } addrspace(5)* %arg0
   store { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef
   ret void
@@ -1756,32 +1839,33 @@ define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8,
 define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0, { i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg1, i32 %arg2) #0 {
   ; CHECK-LABEL: name: void_func_byval_struct_i8_i32_x2
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5)
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5)
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile dereferenceable load (s8) from %ir.arg1, align 4, addrspace 5)
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](s32)
-  ; CHECK:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load (s32) from %ir.arg1 + 4, addrspace 5)
-  ; CHECK:   G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64)
-  ; CHECK:   G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
-  ; CHECK:   G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
-  ; CHECK:   [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64)
-  ; CHECK:   G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
-  ; CHECK:   G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile dereferenceable load (s8) from %ir.arg1, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](s32)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load (s32) from %ir.arg1 + 4, addrspace 5)
+  ; CHECK-NEXT:   G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64)
+  ; CHECK-NEXT:   G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   %arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0
   %arg1.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg1
   store volatile { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef
@@ -1793,20 +1877,21 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({
 define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i64 addrspace(5)* byval(i64) %arg1) #0 {
   ; CHECK-LABEL: name: void_func_byval_i32_byval_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr30_sgpr31
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s64) from %ir.arg1, addrspace 5)
-  ; CHECK:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[LOAD1]](s64), [[COPY3]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s64) from %ir.arg1, addrspace 5)
+  ; CHECK-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s64), [[COPY3]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   %arg0.load = load i32, i32 addrspace(5)* %arg0
   %arg1.load = load i64, i64 addrspace(5)* %arg1
   store i32 %arg0.load, i32 addrspace(1)* undef
@@ -1817,20 +1902,21 @@ define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i
 define void @void_func_byval_i8_align32_i16_align64(i8 addrspace(5)* byval(i8) %arg0, i16 addrspace(5)* byval(i16) align 64 %arg1) #0 {
   ; CHECK-LABEL: name: void_func_byval_i8_align32_i16_align64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr30_sgpr31
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, addrspace 5)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5)
-  ; CHECK:   G_STORE [[LOAD]](s8), [[C]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1)
-  ; CHECK:   G_STORE [[LOAD1]](s16), [[COPY3]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, addrspace 5)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5)
+  ; CHECK-NEXT:   G_STORE [[LOAD]](s8), [[C]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s16), [[COPY3]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   %arg0.load = load i8, i8 addrspace(5)* %arg0
   %arg1.load = load i16, i16 addrspace(5)* %arg1
   store i8 %arg0.load, i8 addrspace(1)* null
@@ -1842,32 +1928,33 @@ define void @void_func_byval_i8_align32_i16_align64(i8 addrspace(5)* byval(i8) %
 define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byval([3 x i32]) align 128 %arg0, i16 addrspace(5)* byval(i16) align 64 %arg1) #0 {
   ; CHECK-LABEL: name: byval_a3i32_align128_byval_i16_align64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr30_sgpr31
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1)
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5)
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 8, addrspace 5)
-  ; CHECK:   [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5)
-  ; CHECK:   G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null`, addrspace 1)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C3]](s64)
-  ; CHECK:   G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 4, addrspace 1)
-  ; CHECK:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; CHECK:   [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C4]](s64)
-  ; CHECK:   G_STORE [[LOAD2]](s32), [[PTR_ADD3]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 8, addrspace 1)
-  ; CHECK:   G_STORE [[LOAD3]](s16), [[COPY3]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 8, addrspace 5)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5)
+  ; CHECK-NEXT:   G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C3]](s64)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 4, addrspace 1)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C4]](s64)
+  ; CHECK-NEXT:   G_STORE [[LOAD2]](s32), [[PTR_ADD3]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 8, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD3]](s16), [[COPY3]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   %arg0.load = load [3 x i32], [3 x i32] addrspace(5)* %arg0
   %arg1.load = load i16, i16 addrspace(5)* %arg1
   store [3 x i32] %arg0.load, [3 x i32] addrspace(1)* null
@@ -1879,53 +1966,54 @@ define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byva
 define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrspace(5)* byval(i8) align 8 %arg2) #0 {
   ; CHECK-LABEL: name: void_func_v32i32_i32_byval_i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; CHECK:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; CHECK:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; CHECK:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; CHECK:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; CHECK:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX2]](p5)
-  ; CHECK:   [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; CHECK:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[C]](p1)
-  ; CHECK:   G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg2, addrspace 5)
-  ; CHECK:   G_STORE [[LOAD2]](s8), [[COPY33]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1)
-  ; CHECK:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]]
-  ; CHECK:   S_SETPC_B64_return [[COPY34]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX2]](p5)
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[C]](p1)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg2, addrspace 5)
+  ; CHECK-NEXT:   G_STORE [[LOAD2]](s8), [[COPY33]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY34]]
   store i32 %arg1, i32 addrspace(1)* null
   %arg2.load = load i8, i8 addrspace(5)* %arg2
   store i8 %arg2.load, i8 addrspace(1)* null
@@ -1936,53 +2024,54 @@ define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrs
 define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* byval(i8) %arg1, i32 %arg2) #0 {
   ; CHECK-LABEL: name: void_func_v32i32_byval_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; CHECK:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; CHECK:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; CHECK:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; CHECK:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; CHECK:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
-  ; CHECK:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5)
-  ; CHECK:   [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; CHECK:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[C]](p1)
-  ; CHECK:   G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg1, addrspace 5)
-  ; CHECK:   G_STORE [[LOAD2]](s8), [[COPY33]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1)
-  ; CHECK:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]]
-  ; CHECK:   S_SETPC_B64_return [[COPY34]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
+  ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[C]](p1)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg1, addrspace 5)
+  ; CHECK-NEXT:   G_STORE [[LOAD2]](s8), [[COPY33]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY34]]
   store i32 %arg2, i32 addrspace(1)* null
   %arg1.load = load i8, i8 addrspace(5)* %arg1
   store i8 %arg1.load, i8 addrspace(1)* null
@@ -1992,57 +2081,58 @@ define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* by
 define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 {
   ; CHECK-LABEL: name: void_func_v32i32_i32_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; CHECK:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; CHECK:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; CHECK:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; CHECK:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; CHECK:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5)
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
-  ; CHECK:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[LOAD1]](s32), [[COPY32]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[MV]](s64), [[COPY33]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
-  ; CHECK:   S_SETPC_B64_return [[COPY34]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5)
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[COPY32]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[MV]](s64), [[COPY33]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY34]]
   store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
   store volatile i32 %arg1, i32 addrspace(1)* undef
   store volatile i64 %arg2, i64 addrspace(1)* undef
@@ -2053,64 +2143,65 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0
 define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4) #0 {
   ; CHECK-LABEL: name: void_func_v32i32_i1_i8_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; CHECK:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; CHECK:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; CHECK:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; CHECK:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; CHECK:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s1) from %fixed-stack.3, align 4, addrspace 5)
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[LOAD1]](s32)
-  ; CHECK:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s16) from %fixed-stack.2, align 8, addrspace 5)
-  ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s16)
-  ; CHECK:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.1, align 4, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.0, align 16, addrspace 5)
-  ; CHECK:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   [[COPY35:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[TRUNC]](s1), [[COPY32]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[TRUNC1]](s8), [[COPY33]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[LOAD3]](s16), [[COPY34]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[LOAD4]](s16), [[COPY35]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
-  ; CHECK:   S_SETPC_B64_return [[COPY36]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s1) from %fixed-stack.3, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[LOAD1]](s32)
+  ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s16) from %fixed-stack.2, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s16)
+  ; CHECK-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.1, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.0, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   [[COPY35:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s1), [[COPY32]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC1]](s8), [[COPY33]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD3]](s16), [[COPY34]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD4]](s16), [[COPY35]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY36]]
   store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
   store volatile i1 %arg1, i1 addrspace(1)* undef
   store volatile i8 %arg2, i8 addrspace(1)* undef
@@ -2122,54 +2213,55 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1
 define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, i8 addrspace(3)* %arg1, i8 addrspace(5)* %arg2) #0 {
   ; CHECK-LABEL: name: void_func_v32i32_p3_p5_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; CHECK:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; CHECK:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; CHECK:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; CHECK:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; CHECK:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (p3) from %fixed-stack.1, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (p5) from %fixed-stack.0, align 8, addrspace 5)
-  ; CHECK:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[LOAD1]](p3), [[COPY32]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[LOAD2]](p5), [[COPY33]](p1) :: (volatile store (p5) into `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
-  ; CHECK:   S_SETPC_B64_return [[COPY34]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (p3) from %fixed-stack.1, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (p5) from %fixed-stack.0, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](p3), [[COPY32]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD2]](p5), [[COPY33]](p1) :: (volatile store (p5) into `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY34]]
   store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
   store volatile i8 addrspace(3)* %arg1, i8 addrspace(3)* addrspace(1)* undef
   store volatile i8 addrspace(5)* %arg2, i8 addrspace(5)* addrspace(1)* undef
@@ -2179,60 +2271,61 @@ define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, i8 addrspace(3)* %arg1
 define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 {
   ; CHECK-LABEL: name: void_func_v32i32_v2i32_v2f32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; CHECK:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; CHECK:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; CHECK:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; CHECK:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; CHECK:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32)
-  ; CHECK:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32)
-  ; CHECK:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[COPY32]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[COPY33]](p1) :: (volatile store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
-  ; CHECK:   S_SETPC_B64_return [[COPY34]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32)
+  ; CHECK-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32)
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[COPY32]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[COPY33]](p1) :: (volatile store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY34]]
   store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
   store volatile <2 x i32> %arg1, <2 x i32> addrspace(1)* undef
   store volatile <2 x float> %arg2, <2 x float> addrspace(1)* undef
@@ -2242,54 +2335,55 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2
 define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2) #0 {
   ; CHECK-LABEL: name: void_func_v32i32_v2i16_v2f16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; CHECK:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; CHECK:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; CHECK:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; CHECK:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; CHECK:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, align 8, addrspace 5)
-  ; CHECK:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[LOAD1]](<2 x s16>), [[COPY32]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[LOAD2]](<2 x s16>), [[COPY33]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
-  ; CHECK:   S_SETPC_B64_return [[COPY34]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](<2 x s16>), [[COPY32]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD2]](<2 x s16>), [[COPY33]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY34]]
   store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
   store volatile <2 x i16> %arg1, <2 x i16> addrspace(1)* undef
   store volatile <2 x half> %arg2, <2 x half> addrspace(1)* undef
@@ -2299,72 +2393,73 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2
 define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 {
   ; CHECK-LABEL: name: void_func_v32i32_v2i64_v2f64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; CHECK:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; CHECK:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; CHECK:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; CHECK:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; CHECK:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
-  ; CHECK:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
-  ; CHECK:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5)
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[LOAD2]](s32)
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD3]](s32), [[LOAD4]](s32)
-  ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
-  ; CHECK:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
-  ; CHECK:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; CHECK:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
-  ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD5]](s32), [[LOAD6]](s32)
-  ; CHECK:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD7]](s32), [[LOAD8]](s32)
-  ; CHECK:   [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64)
-  ; CHECK:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[COPY32]](p1) :: (volatile store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[COPY33]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
-  ; CHECK:   S_SETPC_B64_return [[COPY34]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+  ; CHECK-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[LOAD2]](s32)
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD3]](s32), [[LOAD4]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+  ; CHECK-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+  ; CHECK-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; CHECK-NEXT:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD5]](s32), [[LOAD6]](s32)
+  ; CHECK-NEXT:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD7]](s32), [[LOAD8]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64)
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[COPY32]](p1) :: (volatile store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[COPY33]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY34]]
   store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
   store volatile <2 x i64> %arg1, <2 x i64> addrspace(1)* undef
   store volatile <2 x double> %arg2, <2 x double> addrspace(1)* undef
@@ -2374,68 +2469,69 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2
 define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 {
   ; CHECK-LABEL: name: void_func_v32i32_v4i32_v4f32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; CHECK:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; CHECK:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; CHECK:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; CHECK:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; CHECK:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
-  ; CHECK:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
-  ; CHECK:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32)
-  ; CHECK:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
-  ; CHECK:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; CHECK:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32)
-  ; CHECK:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[COPY32]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY33]](p1) :: (volatile store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
-  ; CHECK:   S_SETPC_B64_return [[COPY34]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+  ; CHECK-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32)
+  ; CHECK-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+  ; CHECK-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; CHECK-NEXT:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32)
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[COPY32]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY33]](p1) :: (volatile store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY34]]
   store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
   store volatile <4 x i32> %arg1, <4 x i32> addrspace(1)* undef
   store volatile <4 x float> %arg2, <4 x float> addrspace(1)* undef
@@ -2445,84 +2541,85 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4
 define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 {
   ; CHECK-LABEL: name: void_func_v32i32_v8i32_v8f32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; CHECK:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; CHECK:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; CHECK:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; CHECK:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; CHECK:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.15, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.14, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13
-  ; CHECK:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.13, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12
-  ; CHECK:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.12, align 16, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11
-  ; CHECK:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.11, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10
-  ; CHECK:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.10, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9
-  ; CHECK:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.9, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
-  ; CHECK:   [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32)
-  ; CHECK:   [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
-  ; CHECK:   [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
-  ; CHECK:   [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
-  ; CHECK:   [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
-  ; CHECK:   [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
-  ; CHECK:   [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; CHECK:   [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32), [[LOAD16]](s32)
-  ; CHECK:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[COPY32]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[COPY33]](p1) :: (volatile store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
-  ; CHECK:   S_SETPC_B64_return [[COPY34]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.15, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.14, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.13, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12
+  ; CHECK-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.12, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11
+  ; CHECK-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.11, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10
+  ; CHECK-NEXT:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.10, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9
+  ; CHECK-NEXT:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.9, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
+  ; CHECK-NEXT:   [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32)
+  ; CHECK-NEXT:   [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
+  ; CHECK-NEXT:   [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
+  ; CHECK-NEXT:   [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
+  ; CHECK-NEXT:   [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+  ; CHECK-NEXT:   [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+  ; CHECK-NEXT:   [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; CHECK-NEXT:   [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32), [[LOAD16]](s32)
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[COPY32]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[COPY33]](p1) :: (volatile store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY34]]
   store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
   store volatile <8 x i32> %arg1, <8 x i32> addrspace(1)* undef
   store volatile <8 x float> %arg2, <8 x float> addrspace(1)* undef
@@ -2532,116 +2629,117 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8
 define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 {
   ; CHECK-LABEL: name: void_func_v32i32_v16i32_v16f32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; CHECK:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; CHECK:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; CHECK:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; CHECK:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; CHECK:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.32
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.32, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.31, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.30, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29
-  ; CHECK:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.29, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28
-  ; CHECK:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.28, align 16, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27
-  ; CHECK:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.27, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26
-  ; CHECK:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.26, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25
-  ; CHECK:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.25, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24
-  ; CHECK:   [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.24, align 16, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23
-  ; CHECK:   [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.23, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22
-  ; CHECK:   [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.22, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21
-  ; CHECK:   [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.21, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20
-  ; CHECK:   [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.20, align 16, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19
-  ; CHECK:   [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.19, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18
-  ; CHECK:   [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.18, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17
-  ; CHECK:   [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.17, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16
-  ; CHECK:   [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32), [[LOAD16]](s32)
-  ; CHECK:   [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15
-  ; CHECK:   [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (s32) from %fixed-stack.15, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14
-  ; CHECK:   [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (s32) from %fixed-stack.14, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13
-  ; CHECK:   [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (s32) from %fixed-stack.13, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12
-  ; CHECK:   [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (s32) from %fixed-stack.12, align 16, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11
-  ; CHECK:   [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (s32) from %fixed-stack.11, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10
-  ; CHECK:   [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (s32) from %fixed-stack.10, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9
-  ; CHECK:   [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (s32) from %fixed-stack.9, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
-  ; CHECK:   [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
-  ; CHECK:   [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
-  ; CHECK:   [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
-  ; CHECK:   [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
-  ; CHECK:   [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
-  ; CHECK:   [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; CHECK:   [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX32:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32), [[LOAD32]](s32)
-  ; CHECK:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[COPY32]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[COPY33]](p1) :: (volatile store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
-  ; CHECK:   S_SETPC_B64_return [[COPY34]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.32
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.32, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.31, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.30, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.29, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28
+  ; CHECK-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.28, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27
+  ; CHECK-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.27, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26
+  ; CHECK-NEXT:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.26, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25
+  ; CHECK-NEXT:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.25, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24
+  ; CHECK-NEXT:   [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.24, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23
+  ; CHECK-NEXT:   [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.23, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22
+  ; CHECK-NEXT:   [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.22, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21
+  ; CHECK-NEXT:   [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.21, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20
+  ; CHECK-NEXT:   [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.20, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19
+  ; CHECK-NEXT:   [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.19, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18
+  ; CHECK-NEXT:   [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.18, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17
+  ; CHECK-NEXT:   [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.17, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16
+  ; CHECK-NEXT:   [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32), [[LOAD16]](s32)
+  ; CHECK-NEXT:   [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15
+  ; CHECK-NEXT:   [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (s32) from %fixed-stack.15, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14
+  ; CHECK-NEXT:   [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (s32) from %fixed-stack.14, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13
+  ; CHECK-NEXT:   [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (s32) from %fixed-stack.13, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12
+  ; CHECK-NEXT:   [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (s32) from %fixed-stack.12, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11
+  ; CHECK-NEXT:   [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (s32) from %fixed-stack.11, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10
+  ; CHECK-NEXT:   [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (s32) from %fixed-stack.10, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9
+  ; CHECK-NEXT:   [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (s32) from %fixed-stack.9, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
+  ; CHECK-NEXT:   [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
+  ; CHECK-NEXT:   [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
+  ; CHECK-NEXT:   [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
+  ; CHECK-NEXT:   [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+  ; CHECK-NEXT:   [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+  ; CHECK-NEXT:   [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; CHECK-NEXT:   [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX32:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32), [[LOAD32]](s32)
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[COPY32]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[COPY33]](p1) :: (volatile store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY34]]
   store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
   store volatile <16 x i32> %arg1, <16 x i32> addrspace(1)* undef
   store volatile <16 x float> %arg2, <16 x float> addrspace(1)* undef
@@ -2652,27 +2750,28 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1,
 define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 {
   ; CHECK-LABEL: name: void_func_v3f32_wasted_reg
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(p3) = COPY [[DEF]](p3)
-  ; CHECK:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32)
-  ; CHECK:   [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32)
-  ; CHECK:   [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32)
-  ; CHECK:   G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3)
-  ; CHECK:   G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3)
-  ; CHECK:   G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3)
-  ; CHECK:   G_STORE [[COPY3]](s32), [[COPY5]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
-  ; CHECK:   [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY6]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(p3) = COPY [[DEF]](p3)
+  ; CHECK-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32)
+  ; CHECK-NEXT:   [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32)
+  ; CHECK-NEXT:   [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32)
+  ; CHECK-NEXT:   G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[COPY3]](s32), [[COPY5]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY6]]
   %arg0.0 = extractelement <3 x float> %arg0, i32 0
   %arg0.1 = extractelement <3 x float> %arg0, i32 1
   %arg0.2 = extractelement <3 x float> %arg0, i32 2
@@ -2686,26 +2785,27 @@ define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 {
 define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 {
   ; CHECK-LABEL: name: void_func_v3i32_wasted_reg
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
-  ; CHECK:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32)
-  ; CHECK:   [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32)
-  ; CHECK:   [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32)
-  ; CHECK:   G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
-  ; CHECK:   G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
-  ; CHECK:   G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
-  ; CHECK:   G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32)
+  ; CHECK-NEXT:   [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32)
+  ; CHECK-NEXT:   [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32)
+  ; CHECK-NEXT:   G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]]
   %arg0.0 = extractelement <3 x i32> %arg0, i32 0
   %arg0.1 = extractelement <3 x i32> %arg0, i32 1
   %arg0.2 = extractelement <3 x i32> %arg0, i32 2
@@ -2720,46 +2820,47 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 {
 define void @void_func_v16i8(<16 x i8> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v16i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32)
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32)
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32)
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32)
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32)
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32)
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16), [[TRUNC12]](s16), [[TRUNC13]](s16), [[TRUNC14]](s16), [[TRUNC15]](s16)
-  ; CHECK:   [[TRUNC16:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s16>)
-  ; CHECK:   [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[TRUNC16]](<16 x s8>), [[DEF]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]]
-  ; CHECK:   S_SETPC_B64_return [[COPY17]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32)
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32)
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32)
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32)
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32)
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16), [[TRUNC12]](s16), [[TRUNC13]](s16), [[TRUNC14]](s16), [[TRUNC15]](s16)
+  ; CHECK-NEXT:   [[TRUNC16:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s16>)
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[TRUNC16]](<16 x s8>), [[DEF]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY17]]
   store volatile <16 x i8> %arg0, <16 x i8> addrspace(1)* undef
   ret void
 }
@@ -2768,82 +2869,83 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 {
 define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
   ; CHECK-LABEL: name: void_func_v32i32_v16i8
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; CHECK:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; CHECK:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; CHECK:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; CHECK:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; CHECK:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; CHECK:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; CHECK:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; CHECK:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; CHECK:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; CHECK:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
-  ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16
-  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
-  ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15
-  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s16) from %fixed-stack.15, align 4, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14
-  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s16) from %fixed-stack.14, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13
-  ; CHECK:   [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.13, align 4, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12
-  ; CHECK:   [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.12, align 16, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11
-  ; CHECK:   [[LOAD5:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s16) from %fixed-stack.11, align 4, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10
-  ; CHECK:   [[LOAD6:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s16) from %fixed-stack.10, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9
-  ; CHECK:   [[LOAD7:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s16) from %fixed-stack.9, align 4, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
-  ; CHECK:   [[LOAD8:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s16) from %fixed-stack.8, align 16, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
-  ; CHECK:   [[LOAD9:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s16) from %fixed-stack.7, align 4, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
-  ; CHECK:   [[LOAD10:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s16) from %fixed-stack.6, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
-  ; CHECK:   [[LOAD11:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s16) from %fixed-stack.5, align 4, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
-  ; CHECK:   [[LOAD12:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s16) from %fixed-stack.4, align 16, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
-  ; CHECK:   [[LOAD13:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s16) from %fixed-stack.3, align 4, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
-  ; CHECK:   [[LOAD14:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s16) from %fixed-stack.2, align 8, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
-  ; CHECK:   [[LOAD15:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s16) from %fixed-stack.1, align 4, addrspace 5)
-  ; CHECK:   [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; CHECK:   [[LOAD16:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s16) from %fixed-stack.0, align 16, addrspace 5)
-  ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[LOAD1]](s16), [[LOAD2]](s16), [[LOAD3]](s16), [[LOAD4]](s16), [[LOAD5]](s16), [[LOAD6]](s16), [[LOAD7]](s16), [[LOAD8]](s16), [[LOAD9]](s16), [[LOAD10]](s16), [[LOAD11]](s16), [[LOAD12]](s16), [[LOAD13]](s16), [[LOAD14]](s16), [[LOAD15]](s16), [[LOAD16]](s16)
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s16>)
-  ; CHECK:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[TRUNC]](<16 x s8>), [[COPY32]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
-  ; CHECK:   S_SETPC_B64_return [[COPY33]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; CHECK-NEXT:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; CHECK-NEXT:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s16) from %fixed-stack.15, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s16) from %fixed-stack.14, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.13, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12
+  ; CHECK-NEXT:   [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.12, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11
+  ; CHECK-NEXT:   [[LOAD5:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s16) from %fixed-stack.11, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10
+  ; CHECK-NEXT:   [[LOAD6:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s16) from %fixed-stack.10, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9
+  ; CHECK-NEXT:   [[LOAD7:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s16) from %fixed-stack.9, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
+  ; CHECK-NEXT:   [[LOAD8:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s16) from %fixed-stack.8, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
+  ; CHECK-NEXT:   [[LOAD9:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s16) from %fixed-stack.7, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
+  ; CHECK-NEXT:   [[LOAD10:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s16) from %fixed-stack.6, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
+  ; CHECK-NEXT:   [[LOAD11:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s16) from %fixed-stack.5, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+  ; CHECK-NEXT:   [[LOAD12:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s16) from %fixed-stack.4, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+  ; CHECK-NEXT:   [[LOAD13:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s16) from %fixed-stack.3, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+  ; CHECK-NEXT:   [[LOAD14:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s16) from %fixed-stack.2, align 8, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+  ; CHECK-NEXT:   [[LOAD15:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s16) from %fixed-stack.1, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+  ; CHECK-NEXT:   [[LOAD16:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s16) from %fixed-stack.0, align 16, addrspace 5)
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[LOAD1]](s16), [[LOAD2]](s16), [[LOAD3]](s16), [[LOAD4]](s16), [[LOAD5]](s16), [[LOAD6]](s16), [[LOAD7]](s16), [[LOAD8]](s16), [[LOAD9]](s16), [[LOAD10]](s16), [[LOAD11]](s16), [[LOAD12]](s16), [[LOAD13]](s16), [[LOAD14]](s16), [[LOAD15]](s16), [[LOAD16]](s16)
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s16>)
+  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](<16 x s8>), [[COPY32]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY33]]
   store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
   store volatile <16 x i8> %arg1, <16 x i8> addrspace(1)* undef
   ret void
@@ -2852,30 +2954,31 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
 define void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %arg0, i8 %pad, {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1) {
   ; CHECK-LABEL: name: pointer_in_struct_argument
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
-  ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; CHECK:   [[MV1:%[0-9]+]]:_(p1234) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32)
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; CHECK:   [[COPY8:%[0-9]+]]:_(p1) = COPY [[C]](p1)
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p1) = COPY [[C]](p1)
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p1) = COPY [[C]](p1)
-  ; CHECK:   G_STORE [[COPY]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
-  ; CHECK:   G_STORE [[MV]](p1), [[COPY8]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1)
-  ; CHECK:   G_STORE [[TRUNC1]](s8), [[COPY9]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
-  ; CHECK:   G_STORE [[COPY4]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
-  ; CHECK:   G_STORE [[MV1]](p1234), [[COPY10]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1)
-  ; CHECK:   [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY7]]
-  ; CHECK:   S_SETPC_B64_return [[COPY11]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(p1234) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32)
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p1) = COPY [[C]](p1)
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p1) = COPY [[C]](p1)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p1) = COPY [[C]](p1)
+  ; CHECK-NEXT:   G_STORE [[COPY]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[MV]](p1), [[COPY8]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC1]](s8), [[COPY9]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY4]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[MV1]](p1234), [[COPY10]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY7]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY11]]
   %val0 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 0
   %val1 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 1
   %val2 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 0
@@ -2891,25 +2994,26 @@ define void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %ar
 define void @vector_ptr_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg) {
   ; CHECK-LABEL: name: vector_ptr_in_struct_arg
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4
-  ; CHECK:   [[COPY5:%[0-9]+]]:_(p3) = COPY $vgpr5
-  ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY4]](p3), [[COPY5]](p3)
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64)
-  ; CHECK:   G_STORE [[BUILD_VECTOR1]](<2 x p3>), [[PTR_ADD]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1)
-  ; CHECK:   [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]]
-  ; CHECK:   S_SETPC_B64_return [[COPY7]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(p3) = COPY $vgpr5
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY4]](p3), [[COPY5]](p3)
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<2 x p3>), [[PTR_ADD]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1)
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY7]]
   store { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg, { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll
index 78a45678db18..ec903d346288 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll
@@ -6,17 +6,18 @@
 define void @test_memcpy_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) {
   ; CHECK-LABEL: name: test_memcpy_p1_p3_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
-  ; CHECK:   G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
+  ; CHECK-NEXT:   G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false)
   ret void
 }
@@ -24,16 +25,17 @@ define void @test_memcpy_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src)
 define void @test_memcpy_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) {
   ; CHECK-LABEL: name: test_memcpy_p1_p3_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
-  ; CHECK:   G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
+  ; CHECK-NEXT:   G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false)
   ret void
 }
@@ -41,17 +43,18 @@ define void @test_memcpy_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src)
 define void @test_memcpy_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) {
   ; CHECK-LABEL: name: test_memcpy_p1_p3_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256
-  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
-  ; CHECK:   G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
+  ; CHECK-NEXT:   G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memcpy.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false)
   ret void
 }
@@ -59,17 +62,18 @@ define void @test_memcpy_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src)
 define void @test_memcpy_p3_p1_i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) {
   ; CHECK-LABEL: name: test_memcpy_p3_p1_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
-  ; CHECK:   G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
+  ; CHECK-NEXT:   G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i64 256, i1 false)
   ret void
 }
@@ -77,16 +81,17 @@ define void @test_memcpy_p3_p1_i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src)
 define void @test_memcpy_p3_p1_i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) {
   ; CHECK-LABEL: name: test_memcpy_p3_p1_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
-  ; CHECK:   G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
+  ; CHECK-NEXT:   G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 256, i1 false)
   ret void
 }
@@ -94,17 +99,18 @@ define void @test_memcpy_p3_p1_i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src)
 define void @test_memcpy_p3_p1_i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) {
   ; CHECK-LABEL: name: test_memcpy_p3_p1_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256
-  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
-  ; CHECK:   G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
+  ; CHECK-NEXT:   G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memcpy.p3i8.p1i8.i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i16 256, i1 false)
   ret void
 }
@@ -112,17 +118,18 @@ define void @test_memcpy_p3_p1_i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src)
 define void @test_memmove_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) {
   ; CHECK-LABEL: name: test_memmove_p1_p3_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
-  ; CHECK:   G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
+  ; CHECK-NEXT:   G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memmove.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false)
   ret void
 }
@@ -130,16 +137,17 @@ define void @test_memmove_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src
 define void @test_memmove_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) {
   ; CHECK-LABEL: name: test_memmove_p1_p3_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
-  ; CHECK:   G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
+  ; CHECK-NEXT:   G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false)
   ret void
 }
@@ -147,17 +155,18 @@ define void @test_memmove_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src
 define void @test_memmove_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) {
   ; CHECK-LABEL: name: test_memmove_p1_p3_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256
-  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
-  ; CHECK:   G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
+  ; CHECK-NEXT:   G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memmove.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false)
   ret void
 }
@@ -165,17 +174,18 @@ define void @test_memmove_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src
 define void @test_memset_p1_i64(i8 addrspace(1)* %dst, i8 %val) {
   ; CHECK-LABEL: name: test_memset_p1_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
-  ; CHECK:   G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
+  ; CHECK-NEXT:   G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 256, i1 false)
   ret void
 }
@@ -183,18 +193,19 @@ define void @test_memset_p1_i64(i8 addrspace(1)* %dst, i8 %val) {
 define void @test_memset_p1_i32(i8 addrspace(1)* %dst, i8 %val) {
   ; CHECK-LABEL: name: test_memset_p1_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
-  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
-  ; CHECK:   G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
+  ; CHECK-NEXT:   G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memset.p1i8.i32(i8 addrspace(1)* %dst, i8 %val, i32 256, i1 false)
   ret void
 }
@@ -202,18 +213,19 @@ define void @test_memset_p1_i32(i8 addrspace(1)* %dst, i8 %val) {
 define void @test_memset_p1_i16(i8 addrspace(1)* %dst, i8 %val) {
   ; CHECK-LABEL: name: test_memset_p1_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256
-  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s16)
-  ; CHECK:   G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s16)
+  ; CHECK-NEXT:   G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memset.p1i8.i16(i8 addrspace(1)* %dst, i8 %val, i16 256, i1 false)
   ret void
 }
@@ -221,16 +233,17 @@ define void @test_memset_p1_i16(i8 addrspace(1)* %dst, i8 %val) {
 define void @test_memset_p3_i64(i8 addrspace(3)* %dst, i8 %val) {
   ; CHECK-LABEL: name: test_memset_p3_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
-  ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
-  ; CHECK:   G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
+  ; CHECK-NEXT:   G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %dst, i8 %val, i64 256, i1 false)
   ret void
 }
@@ -238,15 +251,16 @@ define void @test_memset_p3_i64(i8 addrspace(3)* %dst, i8 %val) {
 define void @test_memset_p3_i32(i8 addrspace(3)* %dst, i8 %val) {
   ; CHECK-LABEL: name: test_memset_p3_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
-  ; CHECK:   G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
+  ; CHECK-NEXT:   G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %dst, i8 %val, i32 256, i1 false)
   ret void
 }
@@ -254,16 +268,17 @@ define void @test_memset_p3_i32(i8 addrspace(3)* %dst, i8 %val) {
 define void @test_memset_p3_i16(i8 addrspace(3)* %dst, i8 %val) {
   ; CHECK-LABEL: name: test_memset_p3_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256
-  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
-  ; CHECK:   G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]]
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
+  ; CHECK-NEXT:   G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
   call void @llvm.memset.p3i8.i16(i8 addrspace(3)* %dst, i8 %val, i16 256, i1 false)
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll
index 3c8ca3d35749..a88c4224a2fa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll
@@ -4,20 +4,21 @@
 define i8* @ptrmask_flat_i64(i8* %ptr, i64 %mask) {
   ; CHECK-LABEL: name: ptrmask_flat_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[MV1]](s64)
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[MV1]](s64)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
   %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %ptr, i64 %mask)
   ret i8* %masked
 }
@@ -25,18 +26,19 @@ define i8* @ptrmask_flat_i64(i8* %ptr, i64 %mask) {
 define i8* @ptrmask_flat_i32(i8* %ptr, i32 %mask) {
   ; CHECK-LABEL: name: ptrmask_flat_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[COPY2]](s32)
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[COPY2]](s32)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1
   %masked = call i8* @llvm.ptrmask.p0i8.i32(i8* %ptr, i32 %mask)
   ret i8* %masked
 }
@@ -44,19 +46,20 @@ define i8* @ptrmask_flat_i32(i8* %ptr, i32 %mask) {
 define i8* @ptrmask_flat_i16(i8* %ptr, i16 %mask) {
   ; CHECK-LABEL: name: ptrmask_flat_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s16)
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s16)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1
   %masked = call i8* @llvm.ptrmask.p0i8.i16(i8* %ptr, i16 %mask)
   ret i8* %masked
 }
@@ -64,19 +67,20 @@ define i8* @ptrmask_flat_i16(i8* %ptr, i16 %mask) {
 define i8* @ptrmask_flat_i1(i8* %ptr, i1 %mask) {
   ; CHECK-LABEL: name: ptrmask_flat_i1
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s1)
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s1)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1
   %masked = call i8* @llvm.ptrmask.p0i8.i1(i8* %ptr, i1 %mask)
   ret i8* %masked
 }
@@ -84,16 +88,17 @@ define i8* @ptrmask_flat_i1(i8* %ptr, i1 %mask) {
 define i8 addrspace(3)* @ptrmask_local_i64(i8 addrspace(3)* %ptr, i64 %mask) {
   ; CHECK-LABEL: name: ptrmask_local_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[MV]](s64)
-  ; CHECK:   $vgpr0 = COPY [[PTRMASK]](p3)
-  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
-  ; CHECK:   S_SETPC_B64_return [[COPY4]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[MV]](s64)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[PTRMASK]](p3)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY4]], implicit $vgpr0
   %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask)
   ret i8 addrspace(3)* %masked
 }
@@ -101,14 +106,15 @@ define i8 addrspace(3)* @ptrmask_local_i64(i8 addrspace(3)* %ptr, i64 %mask) {
 define i8 addrspace(3)* @ptrmask_local_i32(i8 addrspace(3)* %ptr, i32 %mask) {
   ; CHECK-LABEL: name: ptrmask_local_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s32)
-  ; CHECK:   $vgpr0 = COPY [[PTRMASK]](p3)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s32)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[PTRMASK]](p3)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask)
   ret i8 addrspace(3)* %masked
 }
@@ -116,15 +122,16 @@ define i8 addrspace(3)* @ptrmask_local_i32(i8 addrspace(3)* %ptr, i32 %mask) {
 define i8 addrspace(3)* @ptrmask_local_i16(i8 addrspace(3)* %ptr, i16 %mask) {
   ; CHECK-LABEL: name: ptrmask_local_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s16)
-  ; CHECK:   $vgpr0 = COPY [[PTRMASK]](p3)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[PTRMASK]](p3)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask)
   ret i8 addrspace(3)* %masked
 }
@@ -132,15 +139,16 @@ define i8 addrspace(3)* @ptrmask_local_i16(i8 addrspace(3)* %ptr, i16 %mask) {
 define i8 addrspace(3)* @ptrmask_local_i1(i8 addrspace(3)* %ptr, i1 %mask) {
   ; CHECK-LABEL: name: ptrmask_local_i1
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s1)
-  ; CHECK:   $vgpr0 = COPY [[PTRMASK]](p3)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s1)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[PTRMASK]](p3)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i1(i8 addrspace(3)* %ptr, i1 %mask)
   ret i8 addrspace(3)* %masked
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll
index ada7cf54f544..0db076c6b0fc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll
@@ -4,17 +4,18 @@
 define i16 @uaddsat_i16(i16 %lhs, i16 %rhs) {
   ; CHECK-LABEL: name: uaddsat_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %res = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs)
   ret i16 %res
 }
@@ -23,14 +24,15 @@ declare i16 @llvm.uadd.sat.i16(i16, i16)
 define i32 @uaddsat_i32(i32 %lhs, i32 %rhs) {
   ; CHECK-LABEL: name: uaddsat_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]]
-  ; CHECK:   $vgpr0 = COPY [[UADDSAT]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UADDSAT]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %res = call i32 @llvm.uadd.sat.i32(i32 %lhs, i32 %rhs)
   ret i32 %res
 }
@@ -39,20 +41,21 @@ declare i32 @llvm.uadd.sat.i32(i32, i32)
 define i64 @uaddsat_i64(i64 %lhs, i64 %rhs) {
   ; CHECK-LABEL: name: uaddsat_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[MV]], [[MV1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](s64)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[MV]], [[MV1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](s64)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
   %res = call i64 @llvm.uadd.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %res
 }
@@ -61,20 +64,21 @@ declare i64 @llvm.uadd.sat.i64(i64, i64)
 define <2 x i32> @uaddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   ; CHECK-LABEL: name: uaddsat_v2i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
   %res = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
   ret <2 x i32> %res
 }
@@ -83,17 +87,18 @@ declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>)
 define i16 @saddsat_i16(i16 %lhs, i16 %rhs) {
   ; CHECK-LABEL: name: saddsat_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %res = call i16 @llvm.sadd.sat.i16(i16 %lhs, i16 %rhs)
   ret i16 %res
 }
@@ -102,14 +107,15 @@ declare i16 @llvm.sadd.sat.i16(i16, i16)
 define i32 @saddsat_i32(i32 %lhs, i32 %rhs) {
   ; CHECK-LABEL: name: saddsat_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]]
-  ; CHECK:   $vgpr0 = COPY [[SADDSAT]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $vgpr0 = COPY [[SADDSAT]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %res = call i32 @llvm.sadd.sat.i32(i32 %lhs, i32 %rhs)
   ret i32 %res
 }
@@ -118,20 +124,21 @@ declare i32 @llvm.sadd.sat.i32(i32, i32)
 define i64 @saddsat_i64(i64 %lhs, i64 %rhs) {
   ; CHECK-LABEL: name: saddsat_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[MV]], [[MV1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](s64)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[MV]], [[MV1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](s64)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
   %res = call i64 @llvm.sadd.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %res
 }
@@ -140,20 +147,21 @@ declare i64 @llvm.sadd.sat.i64(i64, i64)
 define <2 x i32> @saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   ; CHECK-LABEL: name: saddsat_v2i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
   %res = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
   ret <2 x i32> %res
 }
@@ -162,17 +170,18 @@ declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>)
 define i16 @usubsat_i16(i16 %lhs, i16 %rhs) {
   ; CHECK-LABEL: name: usubsat_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %res = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs)
   ret i16 %res
 }
@@ -181,14 +190,15 @@ declare i16 @llvm.usub.sat.i16(i16, i16)
 define i32 @usubsat_i32(i32 %lhs, i32 %rhs) {
   ; CHECK-LABEL: name: usubsat_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]]
-  ; CHECK:   $vgpr0 = COPY [[USUBSAT]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $vgpr0 = COPY [[USUBSAT]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %res = call i32 @llvm.usub.sat.i32(i32 %lhs, i32 %rhs)
   ret i32 %res
 }
@@ -197,20 +207,21 @@ declare i32 @llvm.usub.sat.i32(i32, i32)
 define i64 @usubsat_i64(i64 %lhs, i64 %rhs) {
   ; CHECK-LABEL: name: usubsat_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[MV]], [[MV1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](s64)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[MV]], [[MV1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](s64)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
   %res = call i64 @llvm.usub.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %res
 }
@@ -219,20 +230,21 @@ declare i64 @llvm.usub.sat.i64(i64, i64)
 define <2 x i32> @usubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   ; CHECK-LABEL: name: usubsat_v2i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
   %res = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
   ret <2 x i32> %res
 }
@@ -241,17 +253,18 @@ declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>)
 define i16 @ssubsat_i16(i16 %lhs, i16 %rhs) {
   ; CHECK-LABEL: name: ssubsat_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %res = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs)
   ret i16 %res
 }
@@ -260,14 +273,15 @@ declare i16 @llvm.ssub.sat.i16(i16, i16)
 define i32 @ssubsat_i32(i32 %lhs, i32 %rhs) {
   ; CHECK-LABEL: name: ssubsat_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]]
-  ; CHECK:   $vgpr0 = COPY [[SSUBSAT]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $vgpr0 = COPY [[SSUBSAT]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %res = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs)
   ret i32 %res
 }
@@ -276,20 +290,21 @@ declare i32 @llvm.ssub.sat.i32(i32, i32)
 define i64 @ssubsat_i64(i64 %lhs, i64 %rhs) {
   ; CHECK-LABEL: name: ssubsat_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[MV]], [[MV1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](s64)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[MV]], [[MV1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](s64)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
   %res = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %res
 }
@@ -298,20 +313,21 @@ declare i64 @llvm.ssub.sat.i64(i64, i64)
 define <2 x i32> @ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   ; CHECK-LABEL: name: ssubsat_v2i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
   %res = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
   ret <2 x i32> %res
 }
@@ -320,17 +336,18 @@ declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>)
 define i16 @ushlsat_i16(i16 %lhs, i16 %rhs) {
   ; CHECK-LABEL: name: ushlsat_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[TRUNC]], [[TRUNC1]](s16)
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USHLSAT]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[TRUNC]], [[TRUNC1]](s16)
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USHLSAT]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %res = call i16 @llvm.ushl.sat.i16(i16 %lhs, i16 %rhs)
   ret i16 %res
 }
@@ -339,14 +356,15 @@ declare i16 @llvm.ushl.sat.i16(i16, i16)
 define i32 @ushlsat_i32(i32 %lhs, i32 %rhs) {
   ; CHECK-LABEL: name: ushlsat_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[COPY1]](s32)
-  ; CHECK:   $vgpr0 = COPY [[USHLSAT]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[COPY1]](s32)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[USHLSAT]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %res = call i32 @llvm.ushl.sat.i32(i32 %lhs, i32 %rhs)
   ret i32 %res
 }
@@ -355,20 +373,21 @@ declare i32 @llvm.ushl.sat.i32(i32, i32)
 define i64 @ushlsat_i64(i64 %lhs, i64 %rhs) {
   ; CHECK-LABEL: name: ushlsat_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[MV]], [[MV1]](s64)
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](s64)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[MV]], [[MV1]](s64)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](s64)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
   %res = call i64 @llvm.ushl.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %res
 }
@@ -377,20 +396,21 @@ declare i64 @llvm.ushl.sat.i64(i64, i64)
 define <2 x i32> @ushlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   ; CHECK-LABEL: name: ushlsat_v2i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[USHLSAT:%[0-9]+]]:_(<2 x s32>) = G_USHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>)
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](<2 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[USHLSAT:%[0-9]+]]:_(<2 x s32>) = G_USHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](<2 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
   %res = call <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
   ret <2 x i32> %res
 }
@@ -399,17 +419,18 @@ declare <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32>, <2 x i32>)
 define i16 @sshlsat_i16(i16 %lhs, i16 %rhs) {
   ; CHECK-LABEL: name: sshlsat_i16
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[TRUNC]], [[TRUNC1]](s16)
-  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSHLSAT]](s16)
-  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[TRUNC]], [[TRUNC1]](s16)
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSHLSAT]](s16)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %res = call i16 @llvm.sshl.sat.i16(i16 %lhs, i16 %rhs)
   ret i16 %res
 }
@@ -418,14 +439,15 @@ declare i16 @llvm.sshl.sat.i16(i16, i16)
 define i32 @sshlsat_i32(i32 %lhs, i32 %rhs) {
   ; CHECK-LABEL: name: sshlsat_i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[COPY1]](s32)
-  ; CHECK:   $vgpr0 = COPY [[SSHLSAT]](s32)
-  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
-  ; CHECK:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[COPY1]](s32)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[SSHLSAT]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
   %res = call i32 @llvm.sshl.sat.i32(i32 %lhs, i32 %rhs)
   ret i32 %res
 }
@@ -434,20 +456,21 @@ declare i32 @llvm.sshl.sat.i32(i32, i32)
 define i64 @sshlsat_i64(i64 %lhs, i64 %rhs) {
   ; CHECK-LABEL: name: sshlsat_i64
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[MV]], [[MV1]](s64)
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](s64)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[MV]], [[MV1]](s64)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](s64)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
   %res = call i64 @llvm.sshl.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %res
 }
@@ -456,20 +479,21 @@ declare i64 @llvm.sshl.sat.i64(i64, i64)
 define <2 x i32> @sshlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   ; CHECK-LABEL: name: sshlsat_v2i32
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[SSHLSAT:%[0-9]+]]:_(<2 x s32>) = G_SSHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>)
-  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](<2 x s32>)
-  ; CHECK:   $vgpr0 = COPY [[UV]](s32)
-  ; CHECK:   $vgpr1 = COPY [[UV1]](s32)
-  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
-  ; CHECK:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[SSHLSAT:%[0-9]+]]:_(<2 x s32>) = G_SSHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>)
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](<2 x s32>)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
+  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1
   %res = call <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
   ret <2 x i32> %res
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll
index 6f4e66a36fbd..403375d6389f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll
@@ -6,16 +6,17 @@ declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1)
 define amdgpu_ps void @test_div_scale(float %arg0, float %arg1) {
   ; CHECK-LABEL: name: test_div_scale
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $vgpr0, $vgpr1
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK:   [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), -1
-  ; CHECK:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[INT1]](s1)
-  ; CHECK:   G_STORE [[INT]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   G_STORE [[SEXT]](s32), [[COPY2]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK:   S_ENDPGM 0
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), -1
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[INT1]](s1)
+  ; CHECK-NEXT:   G_STORE [[INT]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[SEXT]](s32), [[COPY2]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   S_ENDPGM 0
   %call = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %arg0, float %arg1, i1 true)
   %extract0 = extractvalue { float, i1 } %call, 0
   %extract1 = extractvalue { float, i1 } %call, 1

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
index ac1d289e1227..1147647ec514 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
@@ -6,36 +6,37 @@ declare hidden void @external_void_func_void()
 define void @tail_call_void_func_void() {
   ; CHECK-LABEL: name: tail_call_void_func_void
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
-  ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY13]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY15]](s32)
-  ; CHECK:   $vgpr31 = COPY [[COPY16]](s32)
-  ; CHECK:   SI_TCRETURN [[GV]](p0), @external_void_func_void, 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   SI_TCRETURN [[GV]](p0), @external_void_func_void, 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
   tail call void @external_void_func_void()
   ret void
 }


        


More information about the llvm-commits mailing list